| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9470512268618166, | |
| "eval_steps": 300, | |
| "global_step": 3300, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0028698522026115655, | |
| "grad_norm": 168.79563903808594, | |
| "learning_rate": 1.739130434782609e-05, | |
| "loss": 12.0169, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.005739704405223131, | |
| "grad_norm": 20.983991622924805, | |
| "learning_rate": 4.63768115942029e-05, | |
| "loss": 7.8235, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.008609556607834697, | |
| "grad_norm": 21.168655395507812, | |
| "learning_rate": 7.536231884057971e-05, | |
| "loss": 6.834, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.011479408810446262, | |
| "grad_norm": 21.57039451599121, | |
| "learning_rate": 0.00010434782608695653, | |
| "loss": 3.9023, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.014349261013057828, | |
| "grad_norm": 48.81906509399414, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 2.9802, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.017219113215669393, | |
| "grad_norm": 7.396921157836914, | |
| "learning_rate": 0.00016231884057971017, | |
| "loss": 2.6257, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.02008896541828096, | |
| "grad_norm": 62.19234848022461, | |
| "learning_rate": 0.00019130434782608697, | |
| "loss": 2.3201, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.022958817620892524, | |
| "grad_norm": 8.402580261230469, | |
| "learning_rate": 0.00019999792781461744, | |
| "loss": 2.1749, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.02582866982350409, | |
| "grad_norm": 7.064925670623779, | |
| "learning_rate": 0.0001999877785419313, | |
| "loss": 1.8889, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.028698522026115655, | |
| "grad_norm": 7.678985118865967, | |
| "learning_rate": 0.0001999691724338023, | |
| "loss": 1.8161, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.03156837422872722, | |
| "grad_norm": 9.882554054260254, | |
| "learning_rate": 0.0001999421110639107, | |
| "loss": 1.9209, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.034438226431338786, | |
| "grad_norm": 8.960328102111816, | |
| "learning_rate": 0.00019990659672107177, | |
| "loss": 1.8535, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.03730807863395035, | |
| "grad_norm": 6.723909378051758, | |
| "learning_rate": 0.00019986263240904216, | |
| "loss": 1.7978, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.04017793083656192, | |
| "grad_norm": 14.159058570861816, | |
| "learning_rate": 0.00019981022184626578, | |
| "loss": 1.686, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.04304778303917348, | |
| "grad_norm": 12.402606010437012, | |
| "learning_rate": 0.00019974936946555948, | |
| "loss": 1.6932, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.04591763524178505, | |
| "grad_norm": 7.793806076049805, | |
| "learning_rate": 0.000199680080413738, | |
| "loss": 1.5665, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.048787487444396614, | |
| "grad_norm": 9.647517204284668, | |
| "learning_rate": 0.0001996023605511786, | |
| "loss": 1.5892, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.05165733964700818, | |
| "grad_norm": 7.5883564949035645, | |
| "learning_rate": 0.00019951621645132556, | |
| "loss": 1.5003, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.054527191849619745, | |
| "grad_norm": 9.5863676071167, | |
| "learning_rate": 0.00019942165540013412, | |
| "loss": 1.4324, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.05739704405223131, | |
| "grad_norm": 10.761382102966309, | |
| "learning_rate": 0.00019931868539545416, | |
| "loss": 1.2652, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.060266896254842876, | |
| "grad_norm": 23.32731056213379, | |
| "learning_rate": 0.00019920731514635396, | |
| "loss": 1.2868, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.06313674845745444, | |
| "grad_norm": 15.128023147583008, | |
| "learning_rate": 0.00019908755407238343, | |
| "loss": 1.2272, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.066006600660066, | |
| "grad_norm": 12.924105644226074, | |
| "learning_rate": 0.00019895941230277744, | |
| "loss": 1.307, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.06887645286267757, | |
| "grad_norm": 9.334559440612793, | |
| "learning_rate": 0.00019882290067559915, | |
| "loss": 1.1858, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.07174630506528913, | |
| "grad_norm": 12.918402671813965, | |
| "learning_rate": 0.0001986780307368233, | |
| "loss": 1.1668, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.0746161572679007, | |
| "grad_norm": 8.966814994812012, | |
| "learning_rate": 0.00019852481473935974, | |
| "loss": 1.04, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.07748600947051226, | |
| "grad_norm": 10.825933456420898, | |
| "learning_rate": 0.000198363265642017, | |
| "loss": 1.0674, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.08035586167312384, | |
| "grad_norm": 20.35280418395996, | |
| "learning_rate": 0.00019819339710840626, | |
| "loss": 1.1564, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.0832257138757354, | |
| "grad_norm": 24.500883102416992, | |
| "learning_rate": 0.00019801522350578577, | |
| "loss": 1.0751, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.08609556607834697, | |
| "grad_norm": 8.19206428527832, | |
| "learning_rate": 0.00019782875990384568, | |
| "loss": 1.0476, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.08896541828095852, | |
| "grad_norm": 8.840872764587402, | |
| "learning_rate": 0.00019763402207343338, | |
| "loss": 1.0478, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.0918352704835701, | |
| "grad_norm": 11.326393127441406, | |
| "learning_rate": 0.00019743102648521967, | |
| "loss": 1.0235, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.09470512268618166, | |
| "grad_norm": 15.35113525390625, | |
| "learning_rate": 0.00019721979030830572, | |
| "loss": 0.9794, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.09757497488879323, | |
| "grad_norm": 11.8535795211792, | |
| "learning_rate": 0.0001970003314087709, | |
| "loss": 1.0072, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.10044482709140479, | |
| "grad_norm": 24.779190063476562, | |
| "learning_rate": 0.0001967726683481617, | |
| "loss": 1.0056, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.10331467929401636, | |
| "grad_norm": 20.744426727294922, | |
| "learning_rate": 0.00019653682038192188, | |
| "loss": 1.0066, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.10618453149662792, | |
| "grad_norm": 21.19144630432129, | |
| "learning_rate": 0.00019629280745776364, | |
| "loss": 0.9673, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.10905438369923949, | |
| "grad_norm": 18.140127182006836, | |
| "learning_rate": 0.0001960406502139808, | |
| "loss": 0.9903, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.11192423590185105, | |
| "grad_norm": 19.997053146362305, | |
| "learning_rate": 0.00019578036997770296, | |
| "loss": 0.9715, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.11479408810446262, | |
| "grad_norm": 15.790470123291016, | |
| "learning_rate": 0.0001955119887630919, | |
| "loss": 0.9508, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.11766394030707418, | |
| "grad_norm": 18.330507278442383, | |
| "learning_rate": 0.0001952355292694795, | |
| "loss": 0.9867, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.12053379250968575, | |
| "grad_norm": 13.211642265319824, | |
| "learning_rate": 0.0001949510148794478, | |
| "loss": 1.0481, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.12340364471229731, | |
| "grad_norm": 9.442767143249512, | |
| "learning_rate": 0.00019465846965685158, | |
| "loss": 0.9686, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.12627349691490888, | |
| "grad_norm": 15.597809791564941, | |
| "learning_rate": 0.00019435791834478293, | |
| "loss": 1.0821, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.12914334911752046, | |
| "grad_norm": 13.517879486083984, | |
| "learning_rate": 0.0001940493863634784, | |
| "loss": 0.9397, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.132013201320132, | |
| "grad_norm": 13.031438827514648, | |
| "learning_rate": 0.00019373289980816917, | |
| "loss": 1.0009, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.13488305352274357, | |
| "grad_norm": 14.64666748046875, | |
| "learning_rate": 0.00019340848544687386, | |
| "loss": 0.9571, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.13775290572535515, | |
| "grad_norm": 10.706031799316406, | |
| "learning_rate": 0.00019307617071813454, | |
| "loss": 1.0283, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.14062275792796672, | |
| "grad_norm": 9.723997116088867, | |
| "learning_rate": 0.00019273598372869603, | |
| "loss": 0.9815, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.14349261013057826, | |
| "grad_norm": 9.667860984802246, | |
| "learning_rate": 0.0001923879532511287, | |
| "loss": 0.9424, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.14636246233318984, | |
| "grad_norm": 6.956273078918457, | |
| "learning_rate": 0.00019203210872139476, | |
| "loss": 0.9793, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.1492323145358014, | |
| "grad_norm": 15.395605087280273, | |
| "learning_rate": 0.00019166848023635883, | |
| "loss": 1.0637, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.15210216673841298, | |
| "grad_norm": 23.60310173034668, | |
| "learning_rate": 0.0001912970985512422, | |
| "loss": 0.9625, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.15497201894102453, | |
| "grad_norm": 20.658727645874023, | |
| "learning_rate": 0.00019091799507702181, | |
| "loss": 0.9393, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.1578418711436361, | |
| "grad_norm": 18.22756576538086, | |
| "learning_rate": 0.0001905312018777733, | |
| "loss": 0.9354, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.16071172334624767, | |
| "grad_norm": 11.863499641418457, | |
| "learning_rate": 0.00019013675166795922, | |
| "loss": 0.933, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.16358157554885924, | |
| "grad_norm": 11.65882682800293, | |
| "learning_rate": 0.00018973467780966202, | |
| "loss": 0.9119, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.1664514277514708, | |
| "grad_norm": 11.474069595336914, | |
| "learning_rate": 0.00018932501430976242, | |
| "loss": 0.9511, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.16932127995408236, | |
| "grad_norm": 8.225656509399414, | |
| "learning_rate": 0.00018890779581706303, | |
| "loss": 0.9474, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.17219113215669393, | |
| "grad_norm": 15.780831336975098, | |
| "learning_rate": 0.00018848305761935797, | |
| "loss": 0.9528, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1750609843593055, | |
| "grad_norm": 9.415815353393555, | |
| "learning_rate": 0.00018805083564044802, | |
| "loss": 0.8619, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.17793083656191705, | |
| "grad_norm": 9.250490188598633, | |
| "learning_rate": 0.0001876111664371025, | |
| "loss": 0.9168, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.18080068876452862, | |
| "grad_norm": 15.730814933776855, | |
| "learning_rate": 0.0001871640871959672, | |
| "loss": 0.94, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.1836705409671402, | |
| "grad_norm": 9.073026657104492, | |
| "learning_rate": 0.0001867096357304191, | |
| "loss": 0.9471, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.18654039316975177, | |
| "grad_norm": 8.982126235961914, | |
| "learning_rate": 0.00018624785047736842, | |
| "loss": 0.9177, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.1894102453723633, | |
| "grad_norm": 10.682122230529785, | |
| "learning_rate": 0.00018577877049400746, | |
| "loss": 0.9402, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.19228009757497488, | |
| "grad_norm": 8.706944465637207, | |
| "learning_rate": 0.0001853024354545073, | |
| "loss": 0.8867, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.19514994977758646, | |
| "grad_norm": 5.8472371101379395, | |
| "learning_rate": 0.00018481888564666208, | |
| "loss": 0.9135, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 0.19801980198019803, | |
| "grad_norm": 5.432713508605957, | |
| "learning_rate": 0.00018432816196848172, | |
| "loss": 0.8525, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 0.20088965418280957, | |
| "grad_norm": 28.993038177490234, | |
| "learning_rate": 0.00018383030592473266, | |
| "loss": 0.8779, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.20375950638542115, | |
| "grad_norm": 5.313049793243408, | |
| "learning_rate": 0.0001833253596234274, | |
| "loss": 0.9551, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 0.20662935858803272, | |
| "grad_norm": 18.639175415039062, | |
| "learning_rate": 0.00018281336577226327, | |
| "loss": 0.8694, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 0.2094992107906443, | |
| "grad_norm": 15.578129768371582, | |
| "learning_rate": 0.00018229436767501012, | |
| "loss": 0.9017, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 0.21236906299325584, | |
| "grad_norm": 18.0419864654541, | |
| "learning_rate": 0.0001817684092278477, | |
| "loss": 0.8616, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 0.2152389151958674, | |
| "grad_norm": 8.34323787689209, | |
| "learning_rate": 0.00018123553491565308, | |
| "loss": 0.8902, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.21810876739847898, | |
| "grad_norm": 8.49802017211914, | |
| "learning_rate": 0.00018069578980823816, | |
| "loss": 0.8781, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 0.22097861960109055, | |
| "grad_norm": 6.250750541687012, | |
| "learning_rate": 0.00018014921955653772, | |
| "loss": 0.8405, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 0.2238484718037021, | |
| "grad_norm": 25.283082962036133, | |
| "learning_rate": 0.00017959587038874822, | |
| "loss": 0.93, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 0.22671832400631367, | |
| "grad_norm": 18.443071365356445, | |
| "learning_rate": 0.00017903578910641814, | |
| "loss": 0.9202, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 0.22958817620892524, | |
| "grad_norm": 18.457555770874023, | |
| "learning_rate": 0.0001784690230804892, | |
| "loss": 0.9446, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.23245802841153682, | |
| "grad_norm": 7.786270618438721, | |
| "learning_rate": 0.00017789562024729012, | |
| "loss": 0.899, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 0.23532788061414836, | |
| "grad_norm": 6.527904033660889, | |
| "learning_rate": 0.00017731562910448202, | |
| "loss": 0.8866, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 0.23819773281675993, | |
| "grad_norm": 8.394437789916992, | |
| "learning_rate": 0.00017672909870695665, | |
| "loss": 0.8749, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 0.2410675850193715, | |
| "grad_norm": 6.815917491912842, | |
| "learning_rate": 0.00017613607866268742, | |
| "loss": 0.8542, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 0.24393743722198308, | |
| "grad_norm": 16.42218780517578, | |
| "learning_rate": 0.00017553661912853347, | |
| "loss": 0.8658, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.24680728942459462, | |
| "grad_norm": 14.373140335083008, | |
| "learning_rate": 0.00017493077080599768, | |
| "loss": 0.8756, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 0.2496771416272062, | |
| "grad_norm": 17.368059158325195, | |
| "learning_rate": 0.0001743185849369381, | |
| "loss": 0.9572, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 0.25254699382981777, | |
| "grad_norm": 8.744333267211914, | |
| "learning_rate": 0.0001737001132992344, | |
| "loss": 0.8743, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 0.2554168460324293, | |
| "grad_norm": 9.240042686462402, | |
| "learning_rate": 0.0001730754082024082, | |
| "loss": 0.8666, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 0.2582866982350409, | |
| "grad_norm": 8.81686782836914, | |
| "learning_rate": 0.00017244452248319896, | |
| "loss": 0.8771, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.26115655043765246, | |
| "grad_norm": 46.30351638793945, | |
| "learning_rate": 0.00017180750950109504, | |
| "loss": 0.788, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 0.264026402640264, | |
| "grad_norm": 6.262620449066162, | |
| "learning_rate": 0.0001711644231338208, | |
| "loss": 0.916, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 0.2668962548428756, | |
| "grad_norm": 7.936816215515137, | |
| "learning_rate": 0.00017051531777277952, | |
| "loss": 0.8425, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 0.26976610704548715, | |
| "grad_norm": 10.233474731445312, | |
| "learning_rate": 0.00016986024831845296, | |
| "loss": 0.9159, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 0.27263595924809875, | |
| "grad_norm": 13.751338958740234, | |
| "learning_rate": 0.00016919927017575832, | |
| "loss": 0.8484, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.2755058114507103, | |
| "grad_norm": 18.70934295654297, | |
| "learning_rate": 0.00016853243924936173, | |
| "loss": 0.8387, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 0.27837566365332184, | |
| "grad_norm": 6.2156853675842285, | |
| "learning_rate": 0.0001678598119389502, | |
| "loss": 0.9127, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 0.28124551585593344, | |
| "grad_norm": 10.486414909362793, | |
| "learning_rate": 0.00016718144513446127, | |
| "loss": 0.861, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 0.284115368058545, | |
| "grad_norm": 7.782724380493164, | |
| "learning_rate": 0.00016649739621127146, | |
| "loss": 0.8739, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 0.2869852202611565, | |
| "grad_norm": 30.388168334960938, | |
| "learning_rate": 0.00016580772302534337, | |
| "loss": 0.9009, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.2898550724637681, | |
| "grad_norm": 7.943617343902588, | |
| "learning_rate": 0.0001651124839083324, | |
| "loss": 0.8113, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 0.29272492466637967, | |
| "grad_norm": 8.402076721191406, | |
| "learning_rate": 0.00016441173766265315, | |
| "loss": 0.8076, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 0.29559477686899127, | |
| "grad_norm": 7.3927764892578125, | |
| "learning_rate": 0.00016370554355650584, | |
| "loss": 0.8263, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 0.2984646290716028, | |
| "grad_norm": 8.749371528625488, | |
| "learning_rate": 0.0001629939613188638, | |
| "loss": 0.8673, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 0.30133448127421436, | |
| "grad_norm": 4.924167156219482, | |
| "learning_rate": 0.0001622770511344213, | |
| "loss": 0.869, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.30420433347682596, | |
| "grad_norm": 34.14529037475586, | |
| "learning_rate": 0.00016155487363850342, | |
| "loss": 0.9202, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 0.3070741856794375, | |
| "grad_norm": 13.217582702636719, | |
| "learning_rate": 0.00016082748991193757, | |
| "loss": 0.8409, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 0.30994403788204905, | |
| "grad_norm": 19.251298904418945, | |
| "learning_rate": 0.00016009496147588735, | |
| "loss": 0.8624, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 0.31281389008466065, | |
| "grad_norm": 52.710453033447266, | |
| "learning_rate": 0.00015935735028664908, | |
| "loss": 0.8695, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 0.3156837422872722, | |
| "grad_norm": 15.96419906616211, | |
| "learning_rate": 0.00015861471873041184, | |
| "loss": 0.8773, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.3185535944898838, | |
| "grad_norm": 7.947400093078613, | |
| "learning_rate": 0.0001578671296179806, | |
| "loss": 0.8387, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 0.32142344669249534, | |
| "grad_norm": 13.167436599731445, | |
| "learning_rate": 0.00015711464617946402, | |
| "loss": 0.8582, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 0.3242932988951069, | |
| "grad_norm": 11.579595565795898, | |
| "learning_rate": 0.00015635733205892653, | |
| "loss": 0.8615, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 0.3271631510977185, | |
| "grad_norm": 4.840546131134033, | |
| "learning_rate": 0.00015559525130900523, | |
| "loss": 0.822, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 0.33003300330033003, | |
| "grad_norm": 8.159014701843262, | |
| "learning_rate": 0.0001548284683854925, | |
| "loss": 0.8512, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.3329028555029416, | |
| "grad_norm": 33.13652038574219, | |
| "learning_rate": 0.00015405704814188442, | |
| "loss": 0.8686, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 0.3357727077055532, | |
| "grad_norm": 5.398830890655518, | |
| "learning_rate": 0.00015328105582389557, | |
| "loss": 0.8685, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 0.3386425599081647, | |
| "grad_norm": 23.8563289642334, | |
| "learning_rate": 0.00015250055706394057, | |
| "loss": 0.8617, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 0.3415124121107763, | |
| "grad_norm": 5.886293411254883, | |
| "learning_rate": 0.00015171561787558297, | |
| "loss": 0.8559, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 0.34438226431338786, | |
| "grad_norm": 7.887658596038818, | |
| "learning_rate": 0.000150926304647952, | |
| "loss": 0.8811, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.3472521165159994, | |
| "grad_norm": 6.111181259155273, | |
| "learning_rate": 0.00015013268414012742, | |
| "loss": 0.8297, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 0.350121968718611, | |
| "grad_norm": 6.417325496673584, | |
| "learning_rate": 0.00014933482347549303, | |
| "loss": 0.8296, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 0.35299182092122255, | |
| "grad_norm": 48.331573486328125, | |
| "learning_rate": 0.00014853279013605957, | |
| "loss": 0.7966, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 0.3558616731238341, | |
| "grad_norm": 8.638408660888672, | |
| "learning_rate": 0.00014772665195675718, | |
| "loss": 0.8522, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 0.3587315253264457, | |
| "grad_norm": 6.308197498321533, | |
| "learning_rate": 0.00014691647711969803, | |
| "loss": 0.8228, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.36160137752905724, | |
| "grad_norm": 6.23061990737915, | |
| "learning_rate": 0.0001461023341484094, | |
| "loss": 0.7915, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 0.36447122973166884, | |
| "grad_norm": 6.377804756164551, | |
| "learning_rate": 0.00014528429190203824, | |
| "loss": 0.8486, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 0.3673410819342804, | |
| "grad_norm": 6.146363258361816, | |
| "learning_rate": 0.00014446241956952714, | |
| "loss": 0.8927, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 0.37021093413689193, | |
| "grad_norm": 3.900587320327759, | |
| "learning_rate": 0.0001436367866637622, | |
| "loss": 0.8167, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 0.37308078633950353, | |
| "grad_norm": 8.58018684387207, | |
| "learning_rate": 0.00014280746301569407, | |
| "loss": 0.8128, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.3759506385421151, | |
| "grad_norm": 5.754461288452148, | |
| "learning_rate": 0.00014197451876843138, | |
| "loss": 0.8441, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 0.3788204907447266, | |
| "grad_norm": 7.290277004241943, | |
| "learning_rate": 0.00014113802437130845, | |
| "loss": 0.8555, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 0.3816903429473382, | |
| "grad_norm": 43.14801788330078, | |
| "learning_rate": 0.00014029805057392655, | |
| "loss": 0.8299, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 0.38456019514994977, | |
| "grad_norm": 5.909049034118652, | |
| "learning_rate": 0.0001394546684201701, | |
| "loss": 0.8448, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 0.38743004735256137, | |
| "grad_norm": 4.810829162597656, | |
| "learning_rate": 0.00013860794924219782, | |
| "loss": 0.8592, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.3902998995551729, | |
| "grad_norm": 6.602210998535156, | |
| "learning_rate": 0.00013775796465440956, | |
| "loss": 0.8351, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 0.39316975175778446, | |
| "grad_norm": 7.952111721038818, | |
| "learning_rate": 0.0001369047865473893, | |
| "loss": 0.8243, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 0.39603960396039606, | |
| "grad_norm": 8.271283149719238, | |
| "learning_rate": 0.00013604848708182466, | |
| "loss": 0.8239, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 0.3989094561630076, | |
| "grad_norm": 12.694669723510742, | |
| "learning_rate": 0.00013518913868240372, | |
| "loss": 0.8381, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 0.40177930836561915, | |
| "grad_norm": 22.169252395629883, | |
| "learning_rate": 0.00013432681403168932, | |
| "loss": 0.8227, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.40464916056823075, | |
| "grad_norm": 127.96073913574219, | |
| "learning_rate": 0.00013346158606397182, | |
| "loss": 0.8376, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 0.4075190127708423, | |
| "grad_norm": 12.16250991821289, | |
| "learning_rate": 0.0001325935279591003, | |
| "loss": 0.8253, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 0.4103888649734539, | |
| "grad_norm": 11.346808433532715, | |
| "learning_rate": 0.00013172271313629315, | |
| "loss": 0.8554, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 0.41325871717606544, | |
| "grad_norm": 18.371610641479492, | |
| "learning_rate": 0.0001308492152479283, | |
| "loss": 0.7743, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 0.416128569378677, | |
| "grad_norm": 17.174100875854492, | |
| "learning_rate": 0.00012997310817331392, | |
| "loss": 0.8342, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.4189984215812886, | |
| "grad_norm": 15.853143692016602, | |
| "learning_rate": 0.00012909446601243972, | |
| "loss": 0.8514, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 0.4218682737839001, | |
| "grad_norm": 6.734909534454346, | |
| "learning_rate": 0.00012821336307970965, | |
| "loss": 0.7947, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 0.42473812598651167, | |
| "grad_norm": 7.687751770019531, | |
| "learning_rate": 0.00012732987389765658, | |
| "loss": 0.8249, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 0.4276079781891233, | |
| "grad_norm": 4.791903972625732, | |
| "learning_rate": 0.00012644407319063918, | |
| "loss": 0.7755, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 0.4304778303917348, | |
| "grad_norm": 3.5958361625671387, | |
| "learning_rate": 0.0001255560358785219, | |
| "loss": 0.7828, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.4333476825943464, | |
| "grad_norm": 5.9140400886535645, | |
| "learning_rate": 0.00012466583707033832, | |
| "loss": 0.8044, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 0.43621753479695796, | |
| "grad_norm": 5.575759410858154, | |
| "learning_rate": 0.00012377355205793854, | |
| "loss": 0.7996, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 0.4390873869995695, | |
| "grad_norm": 6.771875381469727, | |
| "learning_rate": 0.00012287925630962107, | |
| "loss": 0.8261, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 0.4419572392021811, | |
| "grad_norm": 18.849271774291992, | |
| "learning_rate": 0.00012198302546374978, | |
| "loss": 0.8224, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 0.44482709140479265, | |
| "grad_norm": 5.645337104797363, | |
| "learning_rate": 0.00012108493532235666, | |
| "loss": 0.8185, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.4476969436074042, | |
| "grad_norm": 4.3476481437683105, | |
| "learning_rate": 0.00012018506184473038, | |
| "loss": 0.7985, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 0.4505667958100158, | |
| "grad_norm": 8.391561508178711, | |
| "learning_rate": 0.00011928348114099195, | |
| "loss": 0.7965, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 0.45343664801262734, | |
| "grad_norm": 11.707796096801758, | |
| "learning_rate": 0.00011838026946565723, | |
| "loss": 0.8174, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 0.45630650021523894, | |
| "grad_norm": 9.046381950378418, | |
| "learning_rate": 0.00011747550321118763, | |
| "loss": 0.8, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 0.4591763524178505, | |
| "grad_norm": 8.26490306854248, | |
| "learning_rate": 0.00011656925890152877, | |
| "loss": 0.8229, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.46204620462046203, | |
| "grad_norm": 6.398012638092041, | |
| "learning_rate": 0.00011566161318563821, | |
| "loss": 0.8027, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 0.46491605682307363, | |
| "grad_norm": 5.92479133605957, | |
| "learning_rate": 0.0001147526428310027, | |
| "loss": 0.8094, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 0.4677859090256852, | |
| "grad_norm": 7.79962158203125, | |
| "learning_rate": 0.00011384242471714512, | |
| "loss": 0.8049, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 0.4706557612282967, | |
| "grad_norm": 4.564454078674316, | |
| "learning_rate": 0.00011293103582912221, | |
| "loss": 0.8382, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 0.4735256134309083, | |
| "grad_norm": 20.43712043762207, | |
| "learning_rate": 0.00011201855325101332, | |
| "loss": 0.829, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.47639546563351987, | |
| "grad_norm": 5.778446674346924, | |
| "learning_rate": 0.0001111050541594006, | |
| "loss": 0.8333, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 0.47926531783613147, | |
| "grad_norm": 5.030070781707764, | |
| "learning_rate": 0.00011019061581684165, | |
| "loss": 0.769, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 0.482135170038743, | |
| "grad_norm": 5.967840671539307, | |
| "learning_rate": 0.00010927531556533456, | |
| "loss": 0.8041, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 0.48500502224135456, | |
| "grad_norm": 4.707633972167969, | |
| "learning_rate": 0.00010835923081977673, | |
| "loss": 0.8105, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 0.48787487444396616, | |
| "grad_norm": 6.354760646820068, | |
| "learning_rate": 0.0001074424390614169, | |
| "loss": 0.8031, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.4907447266465777, | |
| "grad_norm": 6.2033915519714355, | |
| "learning_rate": 0.00010652501783130208, | |
| "loss": 0.7559, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 0.49361457884918924, | |
| "grad_norm": 3.7331125736236572, | |
| "learning_rate": 0.00010560704472371919, | |
| "loss": 0.8233, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 0.49648443105180085, | |
| "grad_norm": 9.511772155761719, | |
| "learning_rate": 0.00010468859737963217, | |
| "loss": 0.7945, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 0.4993542832544124, | |
| "grad_norm": 12.07361125946045, | |
| "learning_rate": 0.00010376975348011533, | |
| "loss": 0.8368, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 0.5022241354570239, | |
| "grad_norm": 4.957511901855469, | |
| "learning_rate": 0.00010285059073978312, | |
| "loss": 0.8241, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.5050939876596355, | |
| "grad_norm": 4.124336242675781, | |
| "learning_rate": 0.00010193118690021699, | |
| "loss": 0.807, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 0.5079638398622471, | |
| "grad_norm": 4.789161205291748, | |
| "learning_rate": 0.00010101161972339046, | |
| "loss": 0.8143, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 0.5108336920648586, | |
| "grad_norm": 5.026962757110596, | |
| "learning_rate": 0.00010009196698509173, | |
| "loss": 0.7765, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 0.5137035442674702, | |
| "grad_norm": 8.285078048706055, | |
| "learning_rate": 9.91723064683458e-05, | |
| "loss": 0.8053, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 0.5165733964700818, | |
| "grad_norm": 4.77803897857666, | |
| "learning_rate": 9.825271595683548e-05, | |
| "loss": 0.8072, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.5194432486726933, | |
| "grad_norm": 4.466314315795898, | |
| "learning_rate": 9.73332732283226e-05, | |
| "loss": 0.7936, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 0.5223131008753049, | |
| "grad_norm": 6.21898078918457, | |
| "learning_rate": 9.641405604806983e-05, | |
| "loss": 0.8018, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 0.5251829530779165, | |
| "grad_norm": 3.505802869796753, | |
| "learning_rate": 9.549514216226311e-05, | |
| "loss": 0.823, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 0.528052805280528, | |
| "grad_norm": 4.254824161529541, | |
| "learning_rate": 9.45766092914363e-05, | |
| "loss": 0.824, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 0.5309226574831396, | |
| "grad_norm": 10.659527778625488, | |
| "learning_rate": 9.365853512389735e-05, | |
| "loss": 0.8169, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.5337925096857512, | |
| "grad_norm": 5.28292989730835, | |
| "learning_rate": 9.274099730915778e-05, | |
| "loss": 0.8076, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 0.5366623618883628, | |
| "grad_norm": 5.907596588134766, | |
| "learning_rate": 9.182407345136506e-05, | |
| "loss": 0.7863, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 0.5395322140909743, | |
| "grad_norm": 4.142882347106934, | |
| "learning_rate": 9.090784110273896e-05, | |
| "loss": 0.8133, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 0.5424020662935859, | |
| "grad_norm": 4.616401195526123, | |
| "learning_rate": 8.99923777570124e-05, | |
| "loss": 0.7853, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 0.5452719184961975, | |
| "grad_norm": 7.957604885101318, | |
| "learning_rate": 8.907776084287693e-05, | |
| "loss": 0.8275, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 0.548141770698809, | |
| "grad_norm": 3.326878070831299, | |
| "learning_rate": 8.816406771743412e-05, | |
| "loss": 0.7724, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 0.5510116229014206, | |
| "grad_norm": 4.447857856750488, | |
| "learning_rate": 8.725137565965262e-05, | |
| "loss": 0.8049, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 0.5538814751040322, | |
| "grad_norm": 5.452672004699707, | |
| "learning_rate": 8.633976186383217e-05, | |
| "loss": 0.8034, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 0.5567513273066437, | |
| "grad_norm": 5.054596900939941, | |
| "learning_rate": 8.542930343307444e-05, | |
| "loss": 0.7745, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 0.5596211795092553, | |
| "grad_norm": 25.82883071899414, | |
| "learning_rate": 8.452007737276191e-05, | |
| "loss": 0.7756, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 0.5624910317118669, | |
| "grad_norm": 4.046459197998047, | |
| "learning_rate": 8.361216058404468e-05, | |
| "loss": 0.7597, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 0.5653608839144784, | |
| "grad_norm": 18.29205894470215, | |
| "learning_rate": 8.270562985733652e-05, | |
| "loss": 0.7863, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 0.56823073611709, | |
| "grad_norm": 7.219738006591797, | |
| "learning_rate": 8.180056186581976e-05, | |
| "loss": 0.7651, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 0.5711005883197016, | |
| "grad_norm": 4.146981716156006, | |
| "learning_rate": 8.089703315896058e-05, | |
| "loss": 0.7578, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 0.573970440522313, | |
| "grad_norm": 4.7924675941467285, | |
| "learning_rate": 7.999512015603438e-05, | |
| "loss": 0.7974, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.5768402927249247, | |
| "grad_norm": 5.102847576141357, | |
| "learning_rate": 7.909489913966261e-05, | |
| "loss": 0.805, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 0.5797101449275363, | |
| "grad_norm": 5.353450298309326, | |
| "learning_rate": 7.819644624936051e-05, | |
| "loss": 0.7895, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 0.5825799971301477, | |
| "grad_norm": 5.74714469909668, | |
| "learning_rate": 7.72998374750977e-05, | |
| "loss": 0.8029, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 0.5854498493327593, | |
| "grad_norm": 4.67111873626709, | |
| "learning_rate": 7.640514865087077e-05, | |
| "loss": 0.7763, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 0.5883197015353709, | |
| "grad_norm": 4.226963996887207, | |
| "learning_rate": 7.551245544828944e-05, | |
| "loss": 0.7935, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 0.5911895537379825, | |
| "grad_norm": 6.067037105560303, | |
| "learning_rate": 7.46218333701765e-05, | |
| "loss": 0.7835, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 0.594059405940594, | |
| "grad_norm": 6.7161736488342285, | |
| "learning_rate": 7.373335774418158e-05, | |
| "loss": 0.7793, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 0.5969292581432056, | |
| "grad_norm": 4.633667945861816, | |
| "learning_rate": 7.28471037164103e-05, | |
| "loss": 0.793, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 0.5997991103458172, | |
| "grad_norm": 5.508072376251221, | |
| "learning_rate": 7.196314624506834e-05, | |
| "loss": 0.7589, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 0.6026689625484287, | |
| "grad_norm": 4.465757369995117, | |
| "learning_rate": 7.108156009412176e-05, | |
| "loss": 0.7569, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 0.6055388147510403, | |
| "grad_norm": 3.5824501514434814, | |
| "learning_rate": 7.02024198269733e-05, | |
| "loss": 0.7963, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 0.6084086669536519, | |
| "grad_norm": 8.07539176940918, | |
| "learning_rate": 6.932579980015618e-05, | |
| "loss": 0.8183, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 0.6112785191562634, | |
| "grad_norm": 5.9698615074157715, | |
| "learning_rate": 6.845177415704484e-05, | |
| "loss": 0.749, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 0.614148371358875, | |
| "grad_norm": 4.034762859344482, | |
| "learning_rate": 6.758041682158431e-05, | |
| "loss": 0.7853, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 0.6170182235614866, | |
| "grad_norm": 8.13531494140625, | |
| "learning_rate": 6.671180149203751e-05, | |
| "loss": 0.7871, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 0.6198880757640981, | |
| "grad_norm": 5.809640884399414, | |
| "learning_rate": 6.584600163475222e-05, | |
| "loss": 0.8037, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 0.6227579279667097, | |
| "grad_norm": 5.849427223205566, | |
| "learning_rate": 6.498309047794713e-05, | |
| "loss": 0.8076, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 0.6256277801693213, | |
| "grad_norm": 4.466967582702637, | |
| "learning_rate": 6.412314100551854e-05, | |
| "loss": 0.7863, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 0.6284976323719328, | |
| "grad_norm": 4.934723377227783, | |
| "learning_rate": 6.326622595086722e-05, | |
| "loss": 0.7747, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 0.6313674845745444, | |
| "grad_norm": 4.067635536193848, | |
| "learning_rate": 6.241241779074705e-05, | |
| "loss": 0.7804, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.634237336777156, | |
| "grad_norm": 4.629720687866211, | |
| "learning_rate": 6.156178873913468e-05, | |
| "loss": 0.7672, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 0.6371071889797676, | |
| "grad_norm": 3.9992971420288086, | |
| "learning_rate": 6.071441074112194e-05, | |
| "loss": 0.7856, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 0.6399770411823791, | |
| "grad_norm": 6.1507062911987305, | |
| "learning_rate": 5.9870355466830885e-05, | |
| "loss": 0.752, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 0.6428468933849907, | |
| "grad_norm": 4.305118083953857, | |
| "learning_rate": 5.902969430535186e-05, | |
| "loss": 0.7506, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 0.6457167455876023, | |
| "grad_norm": 3.7307469844818115, | |
| "learning_rate": 5.819249835870566e-05, | |
| "loss": 0.7744, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.6485865977902138, | |
| "grad_norm": 5.391602516174316, | |
| "learning_rate": 5.7358838435829664e-05, | |
| "loss": 0.8067, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 0.6514564499928254, | |
| "grad_norm": 4.221368789672852, | |
| "learning_rate": 5.6528785046589115e-05, | |
| "loss": 0.8257, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 0.654326302195437, | |
| "grad_norm": 5.274345397949219, | |
| "learning_rate": 5.570240839581323e-05, | |
| "loss": 0.7638, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 0.6571961543980485, | |
| "grad_norm": 4.528804779052734, | |
| "learning_rate": 5.487977837735756e-05, | |
| "loss": 0.7805, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 0.6600660066006601, | |
| "grad_norm": 4.387100696563721, | |
| "learning_rate": 5.406096456819234e-05, | |
| "loss": 0.7811, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 0.6629358588032717, | |
| "grad_norm": 5.64663028717041, | |
| "learning_rate": 5.324603622251797e-05, | |
| "loss": 0.771, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 0.6658057110058831, | |
| "grad_norm": 4.328652381896973, | |
| "learning_rate": 5.243506226590722e-05, | |
| "loss": 0.7711, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 0.6686755632084947, | |
| "grad_norm": 4.763848781585693, | |
| "learning_rate": 5.162811128947602e-05, | |
| "loss": 0.7849, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 0.6715454154111064, | |
| "grad_norm": 6.142160892486572, | |
| "learning_rate": 5.082525154408173e-05, | |
| "loss": 0.7587, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 0.6744152676137178, | |
| "grad_norm": 6.3459553718566895, | |
| "learning_rate": 5.002655093455086e-05, | |
| "loss": 0.7762, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 0.6772851198163294, | |
| "grad_norm": 5.520603656768799, | |
| "learning_rate": 4.9232077013935606e-05, | |
| "loss": 0.7854, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 0.680154972018941, | |
| "grad_norm": 3.9489786624908447, | |
| "learning_rate": 4.844189697780033e-05, | |
| "loss": 0.7599, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 0.6830248242215526, | |
| "grad_norm": 5.653624057769775, | |
| "learning_rate": 4.765607765853828e-05, | |
| "loss": 0.7875, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 0.6858946764241641, | |
| "grad_norm": 4.3883957862854, | |
| "learning_rate": 4.6874685519718945e-05, | |
| "loss": 0.7825, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 0.6887645286267757, | |
| "grad_norm": 3.743744134902954, | |
| "learning_rate": 4.60977866504668e-05, | |
| "loss": 0.7796, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.6916343808293873, | |
| "grad_norm": 5.168239593505859, | |
| "learning_rate": 4.5325446759871316e-05, | |
| "loss": 0.7764, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 0.6945042330319988, | |
| "grad_norm": 3.202075958251953, | |
| "learning_rate": 4.455773117142965e-05, | |
| "loss": 0.7483, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 0.6973740852346104, | |
| "grad_norm": 4.126010417938232, | |
| "learning_rate": 4.379470481752139e-05, | |
| "loss": 0.7702, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 0.700243937437222, | |
| "grad_norm": 5.2914509773254395, | |
| "learning_rate": 4.303643223391698e-05, | |
| "loss": 0.7663, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 0.7031137896398335, | |
| "grad_norm": 5.010975360870361, | |
| "learning_rate": 4.2282977554319034e-05, | |
| "loss": 0.7911, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 0.7059836418424451, | |
| "grad_norm": 3.504735231399536, | |
| "learning_rate": 4.153440450493823e-05, | |
| "loss": 0.7452, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 0.7088534940450567, | |
| "grad_norm": 5.5859880447387695, | |
| "learning_rate": 4.0790776399103294e-05, | |
| "loss": 0.758, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 0.7117233462476682, | |
| "grad_norm": 6.027501583099365, | |
| "learning_rate": 4.0052156131906214e-05, | |
| "loss": 0.7945, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 0.7145931984502798, | |
| "grad_norm": 5.546058654785156, | |
| "learning_rate": 3.93186061748824e-05, | |
| "loss": 0.7676, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 0.7174630506528914, | |
| "grad_norm": 4.879994869232178, | |
| "learning_rate": 3.859018857072719e-05, | |
| "loss": 0.7926, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.7203329028555029, | |
| "grad_norm": 4.717655181884766, | |
| "learning_rate": 3.786696492804812e-05, | |
| "loss": 0.7451, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 0.7232027550581145, | |
| "grad_norm": 6.432432174682617, | |
| "learning_rate": 3.714899641615438e-05, | |
| "loss": 0.7938, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 0.7260726072607261, | |
| "grad_norm": 5.008986473083496, | |
| "learning_rate": 3.6436343759882926e-05, | |
| "loss": 0.765, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 0.7289424594633377, | |
| "grad_norm": 7.00074577331543, | |
| "learning_rate": 3.5729067234462785e-05, | |
| "loss": 0.7794, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 0.7318123116659492, | |
| "grad_norm": 6.525863170623779, | |
| "learning_rate": 3.5027226660416736e-05, | |
| "loss": 0.7979, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 0.7346821638685608, | |
| "grad_norm": 5.4863786697387695, | |
| "learning_rate": 3.433088139850193e-05, | |
| "loss": 0.7625, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 0.7375520160711724, | |
| "grad_norm": 3.975086212158203, | |
| "learning_rate": 3.364009034468926e-05, | |
| "loss": 0.7471, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 0.7404218682737839, | |
| "grad_norm": 3.787874460220337, | |
| "learning_rate": 3.2954911925181876e-05, | |
| "loss": 0.7662, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 0.7432917204763955, | |
| "grad_norm": 4.633001804351807, | |
| "learning_rate": 3.2275404091473795e-05, | |
| "loss": 0.774, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 0.7461615726790071, | |
| "grad_norm": 4.832580089569092, | |
| "learning_rate": 3.1601624315448166e-05, | |
| "loss": 0.7749, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.7490314248816186, | |
| "grad_norm": 4.763906955718994, | |
| "learning_rate": 3.0933629584516665e-05, | |
| "loss": 0.7438, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 0.7519012770842302, | |
| "grad_norm": 4.065663814544678, | |
| "learning_rate": 3.027147639679928e-05, | |
| "loss": 0.7546, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 0.7547711292868418, | |
| "grad_norm": 4.496669769287109, | |
| "learning_rate": 2.961522075634604e-05, | |
| "loss": 0.7878, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 0.7576409814894532, | |
| "grad_norm": 3.8822827339172363, | |
| "learning_rate": 2.896491816840008e-05, | |
| "loss": 0.7884, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 0.7605108336920648, | |
| "grad_norm": 4.25615119934082, | |
| "learning_rate": 2.8320623634703147e-05, | |
| "loss": 0.7418, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 0.7633806858946764, | |
| "grad_norm": 4.472879886627197, | |
| "learning_rate": 2.76823916488436e-05, | |
| "loss": 0.7944, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 0.7662505380972879, | |
| "grad_norm": 6.644125938415527, | |
| "learning_rate": 2.705027619164754e-05, | |
| "loss": 0.7525, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 0.7691203902998995, | |
| "grad_norm": 3.8960325717926025, | |
| "learning_rate": 2.6424330726612946e-05, | |
| "loss": 0.748, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 0.7719902425025111, | |
| "grad_norm": 3.907740354537964, | |
| "learning_rate": 2.5804608195388057e-05, | |
| "loss": 0.7686, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 0.7748600947051227, | |
| "grad_norm": 4.432440757751465, | |
| "learning_rate": 2.5191161013293396e-05, | |
| "loss": 0.7671, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 0.7777299469077342, | |
| "grad_norm": 4.681542873382568, | |
| "learning_rate": 2.4584041064888798e-05, | |
| "loss": 0.765, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 0.7805997991103458, | |
| "grad_norm": 4.8185343742370605, | |
| "learning_rate": 2.398329969958486e-05, | |
| "loss": 0.772, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 0.7834696513129574, | |
| "grad_norm": 4.85504150390625, | |
| "learning_rate": 2.3388987727299982e-05, | |
| "loss": 0.7655, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 0.7863395035155689, | |
| "grad_norm": 4.443562030792236, | |
| "learning_rate": 2.2801155414162934e-05, | |
| "loss": 0.7885, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 0.7892093557181805, | |
| "grad_norm": 4.084039211273193, | |
| "learning_rate": 2.221985247826138e-05, | |
| "loss": 0.7679, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.7920792079207921, | |
| "grad_norm": 5.327516555786133, | |
| "learning_rate": 2.164512808543686e-05, | |
| "loss": 0.7704, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 0.7949490601234036, | |
| "grad_norm": 5.7689313888549805, | |
| "learning_rate": 2.1077030845126256e-05, | |
| "loss": 0.7572, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 0.7978189123260152, | |
| "grad_norm": 5.112376689910889, | |
| "learning_rate": 2.0515608806250665e-05, | |
| "loss": 0.7633, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 0.8006887645286268, | |
| "grad_norm": 4.748579502105713, | |
| "learning_rate": 1.996090945315128e-05, | |
| "loss": 0.7757, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 0.8035586167312383, | |
| "grad_norm": 4.38164758682251, | |
| "learning_rate": 1.941297970157344e-05, | |
| "loss": 0.7517, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.8064284689338499, | |
| "grad_norm": 4.2106523513793945, | |
| "learning_rate": 1.8871865894698336e-05, | |
| "loss": 0.7783, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 0.8092983211364615, | |
| "grad_norm": 6.83260440826416, | |
| "learning_rate": 1.8337613799223586e-05, | |
| "loss": 0.758, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 0.812168173339073, | |
| "grad_norm": 4.018373012542725, | |
| "learning_rate": 1.7810268601492164e-05, | |
| "loss": 0.7464, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 0.8150380255416846, | |
| "grad_norm": 5.183018207550049, | |
| "learning_rate": 1.7289874903670677e-05, | |
| "loss": 0.75, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 0.8179078777442962, | |
| "grad_norm": 3.9134421348571777, | |
| "learning_rate": 1.6776476719976974e-05, | |
| "loss": 0.7991, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 0.8207777299469078, | |
| "grad_norm": 5.056222915649414, | |
| "learning_rate": 1.6270117472957534e-05, | |
| "loss": 0.7419, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 0.8236475821495193, | |
| "grad_norm": 4.9499311447143555, | |
| "learning_rate": 1.5770839989814677e-05, | |
| "loss": 0.7927, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 0.8265174343521309, | |
| "grad_norm": 4.165496826171875, | |
| "learning_rate": 1.527868649878451e-05, | |
| "loss": 0.7502, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 0.8293872865547425, | |
| "grad_norm": 5.458337306976318, | |
| "learning_rate": 1.4793698625565122e-05, | |
| "loss": 0.7699, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 0.832257138757354, | |
| "grad_norm": 4.831928253173828, | |
| "learning_rate": 1.4315917389796119e-05, | |
| "loss": 0.7577, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 0.8351269909599656, | |
| "grad_norm": 5.4457221031188965, | |
| "learning_rate": 1.3845383201589057e-05, | |
| "loss": 0.76, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 0.8379968431625772, | |
| "grad_norm": 4.1194586753845215, | |
| "learning_rate": 1.3382135858109735e-05, | |
| "loss": 0.7865, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 0.8408666953651887, | |
| "grad_norm": 4.45517110824585, | |
| "learning_rate": 1.2926214540212155e-05, | |
| "loss": 0.7414, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 0.8437365475678003, | |
| "grad_norm": 4.03952169418335, | |
| "learning_rate": 1.2477657809124631e-05, | |
| "loss": 0.78, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 0.8466063997704119, | |
| "grad_norm": 4.787744998931885, | |
| "learning_rate": 1.2036503603188464e-05, | |
| "loss": 0.7862, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 0.8494762519730233, | |
| "grad_norm": 6.612007141113281, | |
| "learning_rate": 1.1602789234648948e-05, | |
| "loss": 0.7356, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 0.8523461041756349, | |
| "grad_norm": 4.051847457885742, | |
| "learning_rate": 1.1176551386499757e-05, | |
| "loss": 0.7261, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 0.8552159563782465, | |
| "grad_norm": 6.460504055023193, | |
| "learning_rate": 1.0757826109380165e-05, | |
| "loss": 0.7701, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 0.858085808580858, | |
| "grad_norm": 7.030419826507568, | |
| "learning_rate": 1.034664881852614e-05, | |
| "loss": 0.7938, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 0.8609556607834696, | |
| "grad_norm": 6.365281581878662, | |
| "learning_rate": 9.943054290774756e-06, | |
| "loss": 0.7574, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.8638255129860812, | |
| "grad_norm": 5.900289535522461, | |
| "learning_rate": 9.547076661622922e-06, | |
| "loss": 0.7758, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 0.8666953651886928, | |
| "grad_norm": 5.241759777069092, | |
| "learning_rate": 9.15874942234024e-06, | |
| "loss": 0.7805, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 0.8695652173913043, | |
| "grad_norm": 4.609664440155029, | |
| "learning_rate": 8.778105417136395e-06, | |
| "loss": 0.7642, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 0.8724350695939159, | |
| "grad_norm": 6.470444202423096, | |
| "learning_rate": 8.405176840383122e-06, | |
| "loss": 0.7928, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 0.8753049217965275, | |
| "grad_norm": 3.531794786453247, | |
| "learning_rate": 8.039995233891362e-06, | |
| "loss": 0.7503, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 0.878174773999139, | |
| "grad_norm": 5.537559986114502, | |
| "learning_rate": 7.682591484243417e-06, | |
| "loss": 0.7343, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 0.8810446262017506, | |
| "grad_norm": 3.7967238426208496, | |
| "learning_rate": 7.332995820180677e-06, | |
| "loss": 0.7345, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 0.8839144784043622, | |
| "grad_norm": 4.1268839836120605, | |
| "learning_rate": 6.991237810046847e-06, | |
| "loss": 0.7557, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 0.8867843306069737, | |
| "grad_norm": 7.182312965393066, | |
| "learning_rate": 6.6573463592871085e-06, | |
| "loss": 0.7635, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 0.8896541828095853, | |
| "grad_norm": 3.4768388271331787, | |
| "learning_rate": 6.331349708003365e-06, | |
| "loss": 0.7325, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 0.8925240350121969, | |
| "grad_norm": 5.252262115478516, | |
| "learning_rate": 6.013275428565712e-06, | |
| "loss": 0.7513, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 0.8953938872148084, | |
| "grad_norm": 4.213047027587891, | |
| "learning_rate": 5.703150423280401e-06, | |
| "loss": 0.7685, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 0.89826373941742, | |
| "grad_norm": 4.207084655761719, | |
| "learning_rate": 5.401000922114485e-06, | |
| "loss": 0.7313, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 0.9011335916200316, | |
| "grad_norm": 6.862100124359131, | |
| "learning_rate": 5.10685248047732e-06, | |
| "loss": 0.7626, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 0.9040034438226431, | |
| "grad_norm": 3.541048049926758, | |
| "learning_rate": 4.82072997705908e-06, | |
| "loss": 0.7748, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 0.9068732960252547, | |
| "grad_norm": 4.149963855743408, | |
| "learning_rate": 4.542657611726664e-06, | |
| "loss": 0.7651, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 0.9097431482278663, | |
| "grad_norm": 6.455443859100342, | |
| "learning_rate": 4.272658903476745e-06, | |
| "loss": 0.7769, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 0.9126130004304779, | |
| "grad_norm": 5.111416339874268, | |
| "learning_rate": 4.010756688446726e-06, | |
| "loss": 0.779, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 0.9154828526330894, | |
| "grad_norm": 5.0384440422058105, | |
| "learning_rate": 3.7569731179831537e-06, | |
| "loss": 0.7353, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 0.918352704835701, | |
| "grad_norm": 4.619420528411865, | |
| "learning_rate": 3.5113296567682476e-06, | |
| "loss": 0.7686, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.9212225570383126, | |
| "grad_norm": 5.13969612121582, | |
| "learning_rate": 3.2738470810044553e-06, | |
| "loss": 0.7475, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 0.9240924092409241, | |
| "grad_norm": 4.138948917388916, | |
| "learning_rate": 3.0445454766572235e-06, | |
| "loss": 0.743, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 0.9269622614435357, | |
| "grad_norm": 3.4994235038757324, | |
| "learning_rate": 2.8234442377561232e-06, | |
| "loss": 0.7491, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 0.9298321136461473, | |
| "grad_norm": 3.714160442352295, | |
| "learning_rate": 2.6105620647545734e-06, | |
| "loss": 0.7516, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 0.9327019658487588, | |
| "grad_norm": 3.1646008491516113, | |
| "learning_rate": 2.4059169629481403e-06, | |
| "loss": 0.751, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.9355718180513704, | |
| "grad_norm": 4.828333377838135, | |
| "learning_rate": 2.209526240951665e-06, | |
| "loss": 0.741, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 0.938441670253982, | |
| "grad_norm": 3.3315179347991943, | |
| "learning_rate": 2.021406509235402e-06, | |
| "loss": 0.7554, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 0.9413115224565934, | |
| "grad_norm": 6.141576766967773, | |
| "learning_rate": 1.8415736787200433e-06, | |
| "loss": 0.7465, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 0.944181374659205, | |
| "grad_norm": 4.839749336242676, | |
| "learning_rate": 1.6700429594310063e-06, | |
| "loss": 0.761, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 0.9470512268618166, | |
| "grad_norm": 4.683228969573975, | |
| "learning_rate": 1.5068288592120283e-06, | |
| "loss": 0.751, | |
| "step": 3300 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 3485, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 300, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.35032131289088e+20, | |
| "train_batch_size": 6, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |