speech-music-classifier-v3 / trainer_state.json
Aynursusuz's picture
Upload folder using huggingface_hub
c4d269d verified
{
"best_global_step": 4284,
"best_metric": 0.9999124458258547,
"best_model_checkpoint": "./aynur_model3/checkpoint-4284",
"epoch": 1.0,
"eval_steps": 500,
"global_step": 4284,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.00023346757719021772,
"grad_norm": 3.623156785964966,
"learning_rate": 0.0,
"loss": 0.6427,
"step": 1
},
{
"epoch": 0.002334675771902177,
"grad_norm": 1.9220610857009888,
"learning_rate": 8.391608391608393e-06,
"loss": 0.6315,
"step": 10
},
{
"epoch": 0.004669351543804354,
"grad_norm": 2.1033425331115723,
"learning_rate": 1.7715617715617717e-05,
"loss": 0.4519,
"step": 20
},
{
"epoch": 0.007004027315706532,
"grad_norm": 0.2185792475938797,
"learning_rate": 2.7039627039627042e-05,
"loss": 0.0917,
"step": 30
},
{
"epoch": 0.009338703087608709,
"grad_norm": 0.013207816518843174,
"learning_rate": 3.6363636363636364e-05,
"loss": 0.0019,
"step": 40
},
{
"epoch": 0.011673378859510886,
"grad_norm": 0.005016999784857035,
"learning_rate": 4.568764568764569e-05,
"loss": 0.0002,
"step": 50
},
{
"epoch": 0.014008054631413063,
"grad_norm": 0.004350466653704643,
"learning_rate": 5.314685314685315e-05,
"loss": 0.0264,
"step": 60
},
{
"epoch": 0.01634273040331524,
"grad_norm": 0.0037658039946109056,
"learning_rate": 6.247086247086247e-05,
"loss": 0.0609,
"step": 70
},
{
"epoch": 0.018677406175217418,
"grad_norm": 0.003915839828550816,
"learning_rate": 7.17948717948718e-05,
"loss": 0.0008,
"step": 80
},
{
"epoch": 0.021012081947119593,
"grad_norm": 0.018606621772050858,
"learning_rate": 8.111888111888112e-05,
"loss": 0.0134,
"step": 90
},
{
"epoch": 0.023346757719021772,
"grad_norm": 0.005168155301362276,
"learning_rate": 9.044289044289046e-05,
"loss": 0.0012,
"step": 100
},
{
"epoch": 0.025681433490923947,
"grad_norm": 0.004493937361985445,
"learning_rate": 9.976689976689977e-05,
"loss": 0.0035,
"step": 110
},
{
"epoch": 0.028016109262826126,
"grad_norm": 0.006067783106118441,
"learning_rate": 0.00010909090909090909,
"loss": 0.0091,
"step": 120
},
{
"epoch": 0.030350785034728302,
"grad_norm": 0.017138389870524406,
"learning_rate": 0.00011841491841491842,
"loss": 0.0003,
"step": 130
},
{
"epoch": 0.03268546080663048,
"grad_norm": 0.06054692715406418,
"learning_rate": 0.00012773892773892774,
"loss": 0.0036,
"step": 140
},
{
"epoch": 0.03502013657853266,
"grad_norm": 0.09328058362007141,
"learning_rate": 0.00013706293706293706,
"loss": 0.1175,
"step": 150
},
{
"epoch": 0.037354812350434835,
"grad_norm": 0.006551404017955065,
"learning_rate": 0.00014638694638694638,
"loss": 0.0009,
"step": 160
},
{
"epoch": 0.03968948812233701,
"grad_norm": 0.08505500108003616,
"learning_rate": 0.0001557109557109557,
"loss": 0.0159,
"step": 170
},
{
"epoch": 0.042024163894239186,
"grad_norm": 0.007050831336528063,
"learning_rate": 0.00016503496503496504,
"loss": 0.0008,
"step": 180
},
{
"epoch": 0.04435883966614136,
"grad_norm": 0.031755417585372925,
"learning_rate": 0.00017435897435897436,
"loss": 0.05,
"step": 190
},
{
"epoch": 0.046693515438043544,
"grad_norm": 0.06047971546649933,
"learning_rate": 0.00018368298368298368,
"loss": 0.0186,
"step": 200
},
{
"epoch": 0.04902819120994572,
"grad_norm": 0.11913339048624039,
"learning_rate": 0.000193006993006993,
"loss": 0.0093,
"step": 210
},
{
"epoch": 0.051362866981847895,
"grad_norm": 0.006012667436152697,
"learning_rate": 0.00020233100233100232,
"loss": 0.0105,
"step": 220
},
{
"epoch": 0.05369754275375007,
"grad_norm": 0.006747289560735226,
"learning_rate": 0.00021165501165501164,
"loss": 0.0003,
"step": 230
},
{
"epoch": 0.05603221852565225,
"grad_norm": 5.616020202636719,
"learning_rate": 0.00022097902097902096,
"loss": 0.0484,
"step": 240
},
{
"epoch": 0.05836689429755443,
"grad_norm": 0.04582913592457771,
"learning_rate": 0.00023030303030303033,
"loss": 0.0243,
"step": 250
},
{
"epoch": 0.060701570069456603,
"grad_norm": 5.976644992828369,
"learning_rate": 0.00023962703962703965,
"loss": 0.0437,
"step": 260
},
{
"epoch": 0.06303624584135878,
"grad_norm": 0.006856445223093033,
"learning_rate": 0.00024895104895104897,
"loss": 0.0933,
"step": 270
},
{
"epoch": 0.06537092161326095,
"grad_norm": 0.031221158802509308,
"learning_rate": 0.0002582750582750583,
"loss": 0.015,
"step": 280
},
{
"epoch": 0.06770559738516313,
"grad_norm": 0.5157426595687866,
"learning_rate": 0.0002675990675990676,
"loss": 0.0367,
"step": 290
},
{
"epoch": 0.07004027315706532,
"grad_norm": 0.011151552200317383,
"learning_rate": 0.00027692307692307695,
"loss": 0.0501,
"step": 300
},
{
"epoch": 0.0723749489289675,
"grad_norm": 0.03010399080812931,
"learning_rate": 0.00028624708624708624,
"loss": 0.0472,
"step": 310
},
{
"epoch": 0.07470962470086967,
"grad_norm": 0.044886477291584015,
"learning_rate": 0.0002955710955710956,
"loss": 0.0259,
"step": 320
},
{
"epoch": 0.07704430047277185,
"grad_norm": 2.6834709644317627,
"learning_rate": 0.0003048951048951049,
"loss": 0.0336,
"step": 330
},
{
"epoch": 0.07937897624467402,
"grad_norm": 0.021105894818902016,
"learning_rate": 0.0003142191142191143,
"loss": 0.009,
"step": 340
},
{
"epoch": 0.0817136520165762,
"grad_norm": 0.0037551075220108032,
"learning_rate": 0.00032354312354312357,
"loss": 0.0003,
"step": 350
},
{
"epoch": 0.08404832778847837,
"grad_norm": 0.001763952779583633,
"learning_rate": 0.0003328671328671329,
"loss": 0.0001,
"step": 360
},
{
"epoch": 0.08638300356038055,
"grad_norm": 0.10732467472553253,
"learning_rate": 0.0003421911421911422,
"loss": 0.0161,
"step": 370
},
{
"epoch": 0.08871767933228272,
"grad_norm": 0.028179295361042023,
"learning_rate": 0.00035151515151515155,
"loss": 0.0233,
"step": 380
},
{
"epoch": 0.09105235510418491,
"grad_norm": 1.2709873914718628,
"learning_rate": 0.00036083916083916084,
"loss": 0.0281,
"step": 390
},
{
"epoch": 0.09338703087608709,
"grad_norm": 0.6180899143218994,
"learning_rate": 0.0003701631701631702,
"loss": 0.041,
"step": 400
},
{
"epoch": 0.09572170664798926,
"grad_norm": 2.0692641735076904,
"learning_rate": 0.0003794871794871795,
"loss": 0.1022,
"step": 410
},
{
"epoch": 0.09805638241989144,
"grad_norm": 0.06485776603221893,
"learning_rate": 0.0003888111888111888,
"loss": 0.0525,
"step": 420
},
{
"epoch": 0.10039105819179361,
"grad_norm": 0.03012872114777565,
"learning_rate": 0.00039813519813519817,
"loss": 0.011,
"step": 430
},
{
"epoch": 0.10272573396369579,
"grad_norm": 4.067958354949951,
"learning_rate": 0.0003991699092088197,
"loss": 0.0437,
"step": 440
},
{
"epoch": 0.10506040973559796,
"grad_norm": 0.11274830996990204,
"learning_rate": 0.0003981322957198444,
"loss": 0.0304,
"step": 450
},
{
"epoch": 0.10739508550750014,
"grad_norm": 1.0862525701522827,
"learning_rate": 0.00039709468223086904,
"loss": 0.0078,
"step": 460
},
{
"epoch": 0.10972976127940233,
"grad_norm": 1.328466773033142,
"learning_rate": 0.0003960570687418937,
"loss": 0.0373,
"step": 470
},
{
"epoch": 0.1120644370513045,
"grad_norm": 0.19725392758846283,
"learning_rate": 0.00039501945525291835,
"loss": 0.0802,
"step": 480
},
{
"epoch": 0.11439911282320668,
"grad_norm": 1.5255461931228638,
"learning_rate": 0.00039398184176394295,
"loss": 0.0357,
"step": 490
},
{
"epoch": 0.11673378859510886,
"grad_norm": 0.059472762048244476,
"learning_rate": 0.00039294422827496756,
"loss": 0.0269,
"step": 500
},
{
"epoch": 0.11906846436701103,
"grad_norm": 0.14000196754932404,
"learning_rate": 0.0003919066147859922,
"loss": 0.0142,
"step": 510
},
{
"epoch": 0.12140314013891321,
"grad_norm": 0.012143092229962349,
"learning_rate": 0.00039086900129701687,
"loss": 0.03,
"step": 520
},
{
"epoch": 0.12373781591081538,
"grad_norm": 0.5115292072296143,
"learning_rate": 0.00038983138780804153,
"loss": 0.0342,
"step": 530
},
{
"epoch": 0.12607249168271756,
"grad_norm": 5.99329137802124,
"learning_rate": 0.0003887937743190662,
"loss": 0.0446,
"step": 540
},
{
"epoch": 0.12840716745461975,
"grad_norm": 0.1291157752275467,
"learning_rate": 0.0003877561608300908,
"loss": 0.0161,
"step": 550
},
{
"epoch": 0.1307418432265219,
"grad_norm": 1.0032269954681396,
"learning_rate": 0.00038671854734111544,
"loss": 0.0166,
"step": 560
},
{
"epoch": 0.1330765189984241,
"grad_norm": 16.30780601501465,
"learning_rate": 0.0003856809338521401,
"loss": 0.1032,
"step": 570
},
{
"epoch": 0.13541119477032626,
"grad_norm": 4.544622421264648,
"learning_rate": 0.00038464332036316476,
"loss": 0.0279,
"step": 580
},
{
"epoch": 0.13774587054222845,
"grad_norm": 5.893352508544922,
"learning_rate": 0.0003836057068741894,
"loss": 0.0304,
"step": 590
},
{
"epoch": 0.14008054631413064,
"grad_norm": 0.32387709617614746,
"learning_rate": 0.000382568093385214,
"loss": 0.0404,
"step": 600
},
{
"epoch": 0.1424152220860328,
"grad_norm": 0.051759008318185806,
"learning_rate": 0.0003815304798962387,
"loss": 0.0182,
"step": 610
},
{
"epoch": 0.144749897857935,
"grad_norm": 0.09647126495838165,
"learning_rate": 0.0003804928664072633,
"loss": 0.019,
"step": 620
},
{
"epoch": 0.14708457362983715,
"grad_norm": 0.1588832288980484,
"learning_rate": 0.00037945525291828793,
"loss": 0.016,
"step": 630
},
{
"epoch": 0.14941924940173934,
"grad_norm": 0.008694116957485676,
"learning_rate": 0.0003784176394293126,
"loss": 0.0042,
"step": 640
},
{
"epoch": 0.1517539251736415,
"grad_norm": 0.040139373391866684,
"learning_rate": 0.00037738002594033725,
"loss": 0.0201,
"step": 650
},
{
"epoch": 0.1540886009455437,
"grad_norm": 0.032652173191308975,
"learning_rate": 0.0003763424124513619,
"loss": 0.0127,
"step": 660
},
{
"epoch": 0.15642327671744585,
"grad_norm": 0.027006104588508606,
"learning_rate": 0.0003753047989623865,
"loss": 0.0044,
"step": 670
},
{
"epoch": 0.15875795248934804,
"grad_norm": 2.665090799331665,
"learning_rate": 0.00037426718547341117,
"loss": 0.0179,
"step": 680
},
{
"epoch": 0.16109262826125023,
"grad_norm": 0.011558118276298046,
"learning_rate": 0.0003732295719844358,
"loss": 0.0132,
"step": 690
},
{
"epoch": 0.1634273040331524,
"grad_norm": 0.013224626891314983,
"learning_rate": 0.0003721919584954605,
"loss": 0.0044,
"step": 700
},
{
"epoch": 0.16576197980505458,
"grad_norm": 0.03626665472984314,
"learning_rate": 0.00037115434500648514,
"loss": 0.0019,
"step": 710
},
{
"epoch": 0.16809665557695674,
"grad_norm": 0.10942396521568298,
"learning_rate": 0.00037011673151750974,
"loss": 0.0472,
"step": 720
},
{
"epoch": 0.17043133134885893,
"grad_norm": 0.8933264017105103,
"learning_rate": 0.0003690791180285344,
"loss": 0.0148,
"step": 730
},
{
"epoch": 0.1727660071207611,
"grad_norm": 0.06506644189357758,
"learning_rate": 0.000368041504539559,
"loss": 0.0249,
"step": 740
},
{
"epoch": 0.17510068289266328,
"grad_norm": 0.16546858847141266,
"learning_rate": 0.00036700389105058366,
"loss": 0.0105,
"step": 750
},
{
"epoch": 0.17743535866456545,
"grad_norm": 0.032960060983896255,
"learning_rate": 0.0003659662775616083,
"loss": 0.0022,
"step": 760
},
{
"epoch": 0.17977003443646764,
"grad_norm": 0.04502630606293678,
"learning_rate": 0.00036492866407263297,
"loss": 0.0234,
"step": 770
},
{
"epoch": 0.18210471020836982,
"grad_norm": 0.018004219979047775,
"learning_rate": 0.0003638910505836576,
"loss": 0.0024,
"step": 780
},
{
"epoch": 0.18443938598027199,
"grad_norm": 0.026385951787233353,
"learning_rate": 0.00036285343709468223,
"loss": 0.0079,
"step": 790
},
{
"epoch": 0.18677406175217418,
"grad_norm": 0.02292817272245884,
"learning_rate": 0.0003618158236057069,
"loss": 0.0113,
"step": 800
},
{
"epoch": 0.18910873752407634,
"grad_norm": 0.01237889751791954,
"learning_rate": 0.00036077821011673154,
"loss": 0.01,
"step": 810
},
{
"epoch": 0.19144341329597853,
"grad_norm": 0.011882675811648369,
"learning_rate": 0.0003597405966277562,
"loss": 0.0067,
"step": 820
},
{
"epoch": 0.1937780890678807,
"grad_norm": 0.021467048674821854,
"learning_rate": 0.00035870298313878086,
"loss": 0.0138,
"step": 830
},
{
"epoch": 0.19611276483978288,
"grad_norm": 0.04117121547460556,
"learning_rate": 0.00035766536964980546,
"loss": 0.0131,
"step": 840
},
{
"epoch": 0.19844744061168504,
"grad_norm": 0.04125780984759331,
"learning_rate": 0.0003566277561608301,
"loss": 0.0063,
"step": 850
},
{
"epoch": 0.20078211638358723,
"grad_norm": 0.01064964011311531,
"learning_rate": 0.0003555901426718547,
"loss": 0.0068,
"step": 860
},
{
"epoch": 0.20311679215548942,
"grad_norm": 0.18367743492126465,
"learning_rate": 0.0003545525291828794,
"loss": 0.0197,
"step": 870
},
{
"epoch": 0.20545146792739158,
"grad_norm": 0.017399262636899948,
"learning_rate": 0.00035351491569390403,
"loss": 0.0048,
"step": 880
},
{
"epoch": 0.20778614369929377,
"grad_norm": 0.04123668745160103,
"learning_rate": 0.0003524773022049287,
"loss": 0.012,
"step": 890
},
{
"epoch": 0.21012081947119593,
"grad_norm": 0.021881213411688805,
"learning_rate": 0.0003514396887159533,
"loss": 0.002,
"step": 900
},
{
"epoch": 0.21245549524309812,
"grad_norm": 0.7196763753890991,
"learning_rate": 0.00035040207522697795,
"loss": 0.0053,
"step": 910
},
{
"epoch": 0.21479017101500028,
"grad_norm": 0.006894146092236042,
"learning_rate": 0.0003493644617380026,
"loss": 0.0004,
"step": 920
},
{
"epoch": 0.21712484678690247,
"grad_norm": 0.02064809761941433,
"learning_rate": 0.00034832684824902726,
"loss": 0.0022,
"step": 930
},
{
"epoch": 0.21945952255880466,
"grad_norm": 0.0018330852035433054,
"learning_rate": 0.0003472892347600519,
"loss": 0.0008,
"step": 940
},
{
"epoch": 0.22179419833070682,
"grad_norm": 0.00754689471796155,
"learning_rate": 0.0003462516212710766,
"loss": 0.0356,
"step": 950
},
{
"epoch": 0.224128874102609,
"grad_norm": 0.0938534140586853,
"learning_rate": 0.0003452140077821012,
"loss": 0.0021,
"step": 960
},
{
"epoch": 0.22646354987451117,
"grad_norm": 0.043399691581726074,
"learning_rate": 0.00034417639429312584,
"loss": 0.0056,
"step": 970
},
{
"epoch": 0.22879822564641336,
"grad_norm": 0.014040129259228706,
"learning_rate": 0.00034313878080415044,
"loss": 0.0006,
"step": 980
},
{
"epoch": 0.23113290141831552,
"grad_norm": 0.003576503833755851,
"learning_rate": 0.0003421011673151751,
"loss": 0.0007,
"step": 990
},
{
"epoch": 0.2334675771902177,
"grad_norm": 0.0051997085101902485,
"learning_rate": 0.00034106355382619976,
"loss": 0.0001,
"step": 1000
},
{
"epoch": 0.23580225296211987,
"grad_norm": 0.0035423666704446077,
"learning_rate": 0.0003400259403372244,
"loss": 0.011,
"step": 1010
},
{
"epoch": 0.23813692873402206,
"grad_norm": 0.009176980704069138,
"learning_rate": 0.000338988326848249,
"loss": 0.0129,
"step": 1020
},
{
"epoch": 0.24047160450592425,
"grad_norm": 1.0268243551254272,
"learning_rate": 0.00033795071335927367,
"loss": 0.0128,
"step": 1030
},
{
"epoch": 0.24280628027782641,
"grad_norm": 0.15778960287570953,
"learning_rate": 0.00033691309987029833,
"loss": 0.0025,
"step": 1040
},
{
"epoch": 0.2451409560497286,
"grad_norm": 0.014147180132567883,
"learning_rate": 0.000335875486381323,
"loss": 0.0012,
"step": 1050
},
{
"epoch": 0.24747563182163076,
"grad_norm": 0.6503289341926575,
"learning_rate": 0.00033483787289234764,
"loss": 0.0077,
"step": 1060
},
{
"epoch": 0.24981030759353295,
"grad_norm": 0.036625299602746964,
"learning_rate": 0.0003338002594033723,
"loss": 0.0066,
"step": 1070
},
{
"epoch": 0.2521449833654351,
"grad_norm": 0.05064311996102333,
"learning_rate": 0.0003327626459143969,
"loss": 0.0132,
"step": 1080
},
{
"epoch": 0.2544796591373373,
"grad_norm": 0.01040785014629364,
"learning_rate": 0.00033172503242542156,
"loss": 0.0022,
"step": 1090
},
{
"epoch": 0.2568143349092395,
"grad_norm": 0.0481790155172348,
"learning_rate": 0.00033068741893644616,
"loss": 0.0169,
"step": 1100
},
{
"epoch": 0.25914901068114166,
"grad_norm": 0.04906298220157623,
"learning_rate": 0.0003296498054474708,
"loss": 0.0122,
"step": 1110
},
{
"epoch": 0.2614836864530438,
"grad_norm": 0.010882526636123657,
"learning_rate": 0.0003286121919584955,
"loss": 0.01,
"step": 1120
},
{
"epoch": 0.26381836222494603,
"grad_norm": 0.05711141228675842,
"learning_rate": 0.00032757457846952013,
"loss": 0.0017,
"step": 1130
},
{
"epoch": 0.2661530379968482,
"grad_norm": 0.578333854675293,
"learning_rate": 0.00032653696498054474,
"loss": 0.0067,
"step": 1140
},
{
"epoch": 0.26848771376875036,
"grad_norm": 0.01890755444765091,
"learning_rate": 0.0003254993514915694,
"loss": 0.004,
"step": 1150
},
{
"epoch": 0.2708223895406525,
"grad_norm": 0.07548126578330994,
"learning_rate": 0.00032446173800259405,
"loss": 0.0018,
"step": 1160
},
{
"epoch": 0.27315706531255474,
"grad_norm": 0.13876883685588837,
"learning_rate": 0.0003234241245136187,
"loss": 0.0002,
"step": 1170
},
{
"epoch": 0.2754917410844569,
"grad_norm": 0.0030123014003038406,
"learning_rate": 0.00032238651102464336,
"loss": 0.0001,
"step": 1180
},
{
"epoch": 0.27782641685635906,
"grad_norm": 0.00535109406337142,
"learning_rate": 0.00032134889753566797,
"loss": 0.0072,
"step": 1190
},
{
"epoch": 0.2801610926282613,
"grad_norm": 0.05122077465057373,
"learning_rate": 0.0003203112840466926,
"loss": 0.0016,
"step": 1200
},
{
"epoch": 0.28249576840016344,
"grad_norm": 0.003601687727496028,
"learning_rate": 0.0003192736705577173,
"loss": 0.0003,
"step": 1210
},
{
"epoch": 0.2848304441720656,
"grad_norm": 0.001865709782578051,
"learning_rate": 0.0003182360570687419,
"loss": 0.0001,
"step": 1220
},
{
"epoch": 0.28716511994396776,
"grad_norm": 0.0040197898633778095,
"learning_rate": 0.00031719844357976654,
"loss": 0.0,
"step": 1230
},
{
"epoch": 0.28949979571587,
"grad_norm": 1.5420883893966675,
"learning_rate": 0.0003161608300907912,
"loss": 0.0048,
"step": 1240
},
{
"epoch": 0.29183447148777214,
"grad_norm": 0.6053478717803955,
"learning_rate": 0.00031512321660181585,
"loss": 0.0023,
"step": 1250
},
{
"epoch": 0.2941691472596743,
"grad_norm": 0.008091798983514309,
"learning_rate": 0.00031408560311284046,
"loss": 0.0011,
"step": 1260
},
{
"epoch": 0.29650382303157646,
"grad_norm": 0.027423491701483727,
"learning_rate": 0.0003130479896238651,
"loss": 0.002,
"step": 1270
},
{
"epoch": 0.2988384988034787,
"grad_norm": 0.008556556887924671,
"learning_rate": 0.00031201037613488977,
"loss": 0.0204,
"step": 1280
},
{
"epoch": 0.30117317457538084,
"grad_norm": 0.02086860127747059,
"learning_rate": 0.00031097276264591443,
"loss": 0.001,
"step": 1290
},
{
"epoch": 0.303507850347283,
"grad_norm": 0.016817396506667137,
"learning_rate": 0.0003099351491569391,
"loss": 0.0005,
"step": 1300
},
{
"epoch": 0.3058425261191852,
"grad_norm": 0.038000259548425674,
"learning_rate": 0.0003088975356679637,
"loss": 0.0136,
"step": 1310
},
{
"epoch": 0.3081772018910874,
"grad_norm": 0.08027376979589462,
"learning_rate": 0.00030785992217898834,
"loss": 0.0015,
"step": 1320
},
{
"epoch": 0.31051187766298954,
"grad_norm": 0.009664513170719147,
"learning_rate": 0.000306822308690013,
"loss": 0.0007,
"step": 1330
},
{
"epoch": 0.3128465534348917,
"grad_norm": 0.004226653836667538,
"learning_rate": 0.0003057846952010376,
"loss": 0.0002,
"step": 1340
},
{
"epoch": 0.3151812292067939,
"grad_norm": 0.0029185679741203785,
"learning_rate": 0.00030474708171206226,
"loss": 0.0006,
"step": 1350
},
{
"epoch": 0.3175159049786961,
"grad_norm": 0.13733680546283722,
"learning_rate": 0.0003037094682230869,
"loss": 0.0002,
"step": 1360
},
{
"epoch": 0.31985058075059825,
"grad_norm": 0.0015662899240851402,
"learning_rate": 0.0003026718547341116,
"loss": 0.0,
"step": 1370
},
{
"epoch": 0.32218525652250046,
"grad_norm": 0.009845585562288761,
"learning_rate": 0.0003016342412451362,
"loss": 0.0038,
"step": 1380
},
{
"epoch": 0.3245199322944026,
"grad_norm": 0.0025016157887876034,
"learning_rate": 0.00030059662775616084,
"loss": 0.0023,
"step": 1390
},
{
"epoch": 0.3268546080663048,
"grad_norm": 0.0021448610350489616,
"learning_rate": 0.0002995590142671855,
"loss": 0.0014,
"step": 1400
},
{
"epoch": 0.32918928383820695,
"grad_norm": 0.0016592498868703842,
"learning_rate": 0.00029852140077821015,
"loss": 0.0031,
"step": 1410
},
{
"epoch": 0.33152395961010916,
"grad_norm": 0.0032906723208725452,
"learning_rate": 0.0002974837872892348,
"loss": 0.0088,
"step": 1420
},
{
"epoch": 0.3338586353820113,
"grad_norm": 0.007907208986580372,
"learning_rate": 0.0002964461738002594,
"loss": 0.0045,
"step": 1430
},
{
"epoch": 0.3361933111539135,
"grad_norm": 0.004995182156562805,
"learning_rate": 0.00029540856031128407,
"loss": 0.0009,
"step": 1440
},
{
"epoch": 0.3385279869258157,
"grad_norm": 0.004081010818481445,
"learning_rate": 0.0002943709468223087,
"loss": 0.0004,
"step": 1450
},
{
"epoch": 0.34086266269771787,
"grad_norm": 0.003877257462590933,
"learning_rate": 0.0002933333333333333,
"loss": 0.0012,
"step": 1460
},
{
"epoch": 0.34319733846962003,
"grad_norm": 0.002449814695864916,
"learning_rate": 0.000292295719844358,
"loss": 0.0002,
"step": 1470
},
{
"epoch": 0.3455320142415222,
"grad_norm": 0.002753973240032792,
"learning_rate": 0.00029125810635538264,
"loss": 0.0002,
"step": 1480
},
{
"epoch": 0.3478666900134244,
"grad_norm": 0.0017747861566022038,
"learning_rate": 0.00029022049286640724,
"loss": 0.0001,
"step": 1490
},
{
"epoch": 0.35020136578532657,
"grad_norm": 0.0011892006732523441,
"learning_rate": 0.0002891828793774319,
"loss": 0.0004,
"step": 1500
},
{
"epoch": 0.35253604155722873,
"grad_norm": 0.0011454072082415223,
"learning_rate": 0.00028814526588845656,
"loss": 0.0,
"step": 1510
},
{
"epoch": 0.3548707173291309,
"grad_norm": 0.0009849355556070805,
"learning_rate": 0.0002871076523994812,
"loss": 0.0,
"step": 1520
},
{
"epoch": 0.3572053931010331,
"grad_norm": 0.0009979312308132648,
"learning_rate": 0.00028607003891050587,
"loss": 0.0,
"step": 1530
},
{
"epoch": 0.35954006887293527,
"grad_norm": 0.6208717226982117,
"learning_rate": 0.00028503242542153053,
"loss": 0.0012,
"step": 1540
},
{
"epoch": 0.36187474464483743,
"grad_norm": 1.135374665260315,
"learning_rate": 0.00028399481193255513,
"loss": 0.0008,
"step": 1550
},
{
"epoch": 0.36420942041673965,
"grad_norm": 0.0006218306953087449,
"learning_rate": 0.0002829571984435798,
"loss": 0.0001,
"step": 1560
},
{
"epoch": 0.3665440961886418,
"grad_norm": 0.002007074421271682,
"learning_rate": 0.00028191958495460444,
"loss": 0.0013,
"step": 1570
},
{
"epoch": 0.36887877196054397,
"grad_norm": 0.0011203576577827334,
"learning_rate": 0.00028088197146562905,
"loss": 0.0051,
"step": 1580
},
{
"epoch": 0.37121344773244613,
"grad_norm": 0.004436762072145939,
"learning_rate": 0.0002798443579766537,
"loss": 0.0091,
"step": 1590
},
{
"epoch": 0.37354812350434835,
"grad_norm": 0.4702003598213196,
"learning_rate": 0.00027880674448767836,
"loss": 0.0009,
"step": 1600
},
{
"epoch": 0.3758827992762505,
"grad_norm": 0.013339284807443619,
"learning_rate": 0.00027776913099870296,
"loss": 0.0093,
"step": 1610
},
{
"epoch": 0.3782174750481527,
"grad_norm": 0.0076319011859595776,
"learning_rate": 0.0002767315175097276,
"loss": 0.0025,
"step": 1620
},
{
"epoch": 0.3805521508200549,
"grad_norm": 0.005932167172431946,
"learning_rate": 0.0002756939040207523,
"loss": 0.0004,
"step": 1630
},
{
"epoch": 0.38288682659195705,
"grad_norm": 0.003560519078746438,
"learning_rate": 0.00027465629053177693,
"loss": 0.0003,
"step": 1640
},
{
"epoch": 0.3852215023638592,
"grad_norm": 0.001825852901674807,
"learning_rate": 0.0002736186770428016,
"loss": 0.0003,
"step": 1650
},
{
"epoch": 0.3875561781357614,
"grad_norm": 0.11422229558229446,
"learning_rate": 0.00027258106355382625,
"loss": 0.0006,
"step": 1660
},
{
"epoch": 0.3898908539076636,
"grad_norm": 0.00336836208589375,
"learning_rate": 0.00027154345006485085,
"loss": 0.0002,
"step": 1670
},
{
"epoch": 0.39222552967956575,
"grad_norm": 0.020830854773521423,
"learning_rate": 0.0002705058365758755,
"loss": 0.0003,
"step": 1680
},
{
"epoch": 0.3945602054514679,
"grad_norm": 0.5437701940536499,
"learning_rate": 0.00026946822308690017,
"loss": 0.0006,
"step": 1690
},
{
"epoch": 0.3968948812233701,
"grad_norm": 0.012015492655336857,
"learning_rate": 0.00026843060959792477,
"loss": 0.0001,
"step": 1700
},
{
"epoch": 0.3992295569952723,
"grad_norm": 0.0010134581243619323,
"learning_rate": 0.0002673929961089494,
"loss": 0.0005,
"step": 1710
},
{
"epoch": 0.40156423276717446,
"grad_norm": 0.001305864891037345,
"learning_rate": 0.0002663553826199741,
"loss": 0.0006,
"step": 1720
},
{
"epoch": 0.4038989085390766,
"grad_norm": 0.0030054424423724413,
"learning_rate": 0.0002653177691309987,
"loss": 0.0,
"step": 1730
},
{
"epoch": 0.40623358431097883,
"grad_norm": 0.0008168119820766151,
"learning_rate": 0.00026428015564202334,
"loss": 0.005,
"step": 1740
},
{
"epoch": 0.408568260082881,
"grad_norm": 0.0005433742771856487,
"learning_rate": 0.000263242542153048,
"loss": 0.0022,
"step": 1750
},
{
"epoch": 0.41090293585478316,
"grad_norm": 0.03222297504544258,
"learning_rate": 0.00026220492866407266,
"loss": 0.0001,
"step": 1760
},
{
"epoch": 0.4132376116266853,
"grad_norm": 0.001766858738847077,
"learning_rate": 0.0002611673151750973,
"loss": 0.0001,
"step": 1770
},
{
"epoch": 0.41557228739858754,
"grad_norm": 0.00046385781024582684,
"learning_rate": 0.0002601297016861219,
"loss": 0.0,
"step": 1780
},
{
"epoch": 0.4179069631704897,
"grad_norm": 0.00034939011675305665,
"learning_rate": 0.00025909208819714657,
"loss": 0.0,
"step": 1790
},
{
"epoch": 0.42024163894239186,
"grad_norm": 0.0006307600415311754,
"learning_rate": 0.00025805447470817123,
"loss": 0.0,
"step": 1800
},
{
"epoch": 0.4225763147142941,
"grad_norm": 0.0005922391428612173,
"learning_rate": 0.0002570168612191959,
"loss": 0.0,
"step": 1810
},
{
"epoch": 0.42491099048619624,
"grad_norm": 0.00035804559593088925,
"learning_rate": 0.0002559792477302205,
"loss": 0.0,
"step": 1820
},
{
"epoch": 0.4272456662580984,
"grad_norm": 0.002265334827825427,
"learning_rate": 0.00025494163424124515,
"loss": 0.0,
"step": 1830
},
{
"epoch": 0.42958034203000056,
"grad_norm": 0.00030151245300658047,
"learning_rate": 0.0002539040207522698,
"loss": 0.0001,
"step": 1840
},
{
"epoch": 0.4319150178019028,
"grad_norm": 0.0007045480306260288,
"learning_rate": 0.0002528664072632944,
"loss": 0.0,
"step": 1850
},
{
"epoch": 0.43424969357380494,
"grad_norm": 0.0061547341756522655,
"learning_rate": 0.00025182879377431906,
"loss": 0.0,
"step": 1860
},
{
"epoch": 0.4365843693457071,
"grad_norm": 0.0003125610819552094,
"learning_rate": 0.0002507911802853437,
"loss": 0.0,
"step": 1870
},
{
"epoch": 0.4389190451176093,
"grad_norm": 0.0002632684481795877,
"learning_rate": 0.0002497535667963684,
"loss": 0.0,
"step": 1880
},
{
"epoch": 0.4412537208895115,
"grad_norm": 0.0003021568991243839,
"learning_rate": 0.00024871595330739303,
"loss": 0.0,
"step": 1890
},
{
"epoch": 0.44358839666141364,
"grad_norm": 0.0002902685955632478,
"learning_rate": 0.00024767833981841764,
"loss": 0.0,
"step": 1900
},
{
"epoch": 0.4459230724333158,
"grad_norm": 0.000261983135715127,
"learning_rate": 0.0002466407263294423,
"loss": 0.0001,
"step": 1910
},
{
"epoch": 0.448257748205218,
"grad_norm": 0.00039503935840912163,
"learning_rate": 0.00024560311284046695,
"loss": 0.0,
"step": 1920
},
{
"epoch": 0.4505924239771202,
"grad_norm": 0.00022091201390139759,
"learning_rate": 0.0002445654993514916,
"loss": 0.0,
"step": 1930
},
{
"epoch": 0.45292709974902234,
"grad_norm": 2.493230104446411,
"learning_rate": 0.0002435278858625162,
"loss": 0.0371,
"step": 1940
},
{
"epoch": 0.4552617755209245,
"grad_norm": 0.07945345342159271,
"learning_rate": 0.00024249027237354084,
"loss": 0.0034,
"step": 1950
},
{
"epoch": 0.4575964512928267,
"grad_norm": 0.0050026909448206425,
"learning_rate": 0.0002414526588845655,
"loss": 0.0043,
"step": 1960
},
{
"epoch": 0.4599311270647289,
"grad_norm": 0.006855088286101818,
"learning_rate": 0.00024041504539559015,
"loss": 0.0065,
"step": 1970
},
{
"epoch": 0.46226580283663105,
"grad_norm": 0.007564285770058632,
"learning_rate": 0.00023937743190661478,
"loss": 0.025,
"step": 1980
},
{
"epoch": 0.46460047860853326,
"grad_norm": 0.13361288607120514,
"learning_rate": 0.00023833981841763944,
"loss": 0.0058,
"step": 1990
},
{
"epoch": 0.4669351543804354,
"grad_norm": 0.011933702044188976,
"learning_rate": 0.0002373022049286641,
"loss": 0.0034,
"step": 2000
},
{
"epoch": 0.4692698301523376,
"grad_norm": 0.0047375899739563465,
"learning_rate": 0.00023626459143968873,
"loss": 0.0051,
"step": 2010
},
{
"epoch": 0.47160450592423975,
"grad_norm": 0.012455107644200325,
"learning_rate": 0.00023522697795071338,
"loss": 0.0001,
"step": 2020
},
{
"epoch": 0.47393918169614196,
"grad_norm": 0.003286924911662936,
"learning_rate": 0.00023418936446173801,
"loss": 0.0026,
"step": 2030
},
{
"epoch": 0.4762738574680441,
"grad_norm": 0.004305190406739712,
"learning_rate": 0.00023315175097276267,
"loss": 0.0034,
"step": 2040
},
{
"epoch": 0.4786085332399463,
"grad_norm": 0.003077031811699271,
"learning_rate": 0.00023211413748378733,
"loss": 0.0007,
"step": 2050
},
{
"epoch": 0.4809432090118485,
"grad_norm": 0.003154961857944727,
"learning_rate": 0.00023107652399481193,
"loss": 0.0003,
"step": 2060
},
{
"epoch": 0.48327788478375067,
"grad_norm": 0.0026587171014398336,
"learning_rate": 0.00023003891050583656,
"loss": 0.0001,
"step": 2070
},
{
"epoch": 0.48561256055565283,
"grad_norm": 0.018204033374786377,
"learning_rate": 0.00022900129701686122,
"loss": 0.0002,
"step": 2080
},
{
"epoch": 0.487947236327555,
"grad_norm": 0.013659532181918621,
"learning_rate": 0.00022796368352788588,
"loss": 0.0001,
"step": 2090
},
{
"epoch": 0.4902819120994572,
"grad_norm": 0.0016216342337429523,
"learning_rate": 0.0002269260700389105,
"loss": 0.0092,
"step": 2100
},
{
"epoch": 0.49261658787135937,
"grad_norm": 0.13890917599201202,
"learning_rate": 0.00022588845654993516,
"loss": 0.0006,
"step": 2110
},
{
"epoch": 0.49495126364326153,
"grad_norm": 0.004591196309775114,
"learning_rate": 0.0002248508430609598,
"loss": 0.0014,
"step": 2120
},
{
"epoch": 0.49728593941516375,
"grad_norm": 0.0019425478531047702,
"learning_rate": 0.00022381322957198445,
"loss": 0.0001,
"step": 2130
},
{
"epoch": 0.4996206151870659,
"grad_norm": 0.016683168709278107,
"learning_rate": 0.0002227756160830091,
"loss": 0.0042,
"step": 2140
},
{
"epoch": 0.5019552909589681,
"grad_norm": 0.0052938396111130714,
"learning_rate": 0.00022173800259403374,
"loss": 0.0005,
"step": 2150
},
{
"epoch": 0.5042899667308702,
"grad_norm": 0.0712481215596199,
"learning_rate": 0.0002207003891050584,
"loss": 0.0003,
"step": 2160
},
{
"epoch": 0.5066246425027724,
"grad_norm": 0.002925017150118947,
"learning_rate": 0.00021966277561608305,
"loss": 0.005,
"step": 2170
},
{
"epoch": 0.5089593182746746,
"grad_norm": 0.06688928604125977,
"learning_rate": 0.00021862516212710765,
"loss": 0.0108,
"step": 2180
},
{
"epoch": 0.5112939940465768,
"grad_norm": 0.007370030973106623,
"learning_rate": 0.00021758754863813228,
"loss": 0.0005,
"step": 2190
},
{
"epoch": 0.513628669818479,
"grad_norm": 0.006824078969657421,
"learning_rate": 0.00021654993514915694,
"loss": 0.0002,
"step": 2200
},
{
"epoch": 0.5159633455903812,
"grad_norm": 0.004564494825899601,
"learning_rate": 0.00021551232166018157,
"loss": 0.0001,
"step": 2210
},
{
"epoch": 0.5182980213622833,
"grad_norm": 0.0024187033995985985,
"learning_rate": 0.00021447470817120623,
"loss": 0.0003,
"step": 2220
},
{
"epoch": 0.5206326971341855,
"grad_norm": 0.0019623206462711096,
"learning_rate": 0.00021343709468223088,
"loss": 0.0004,
"step": 2230
},
{
"epoch": 0.5229673729060876,
"grad_norm": 0.006479791831225157,
"learning_rate": 0.0002123994811932555,
"loss": 0.0009,
"step": 2240
},
{
"epoch": 0.5253020486779898,
"grad_norm": 0.001472059520892799,
"learning_rate": 0.00021136186770428017,
"loss": 0.0022,
"step": 2250
},
{
"epoch": 0.5276367244498921,
"grad_norm": 0.0011262299958616495,
"learning_rate": 0.00021032425421530483,
"loss": 0.0,
"step": 2260
},
{
"epoch": 0.5299714002217942,
"grad_norm": 0.0010004049399867654,
"learning_rate": 0.00020928664072632946,
"loss": 0.0034,
"step": 2270
},
{
"epoch": 0.5323060759936964,
"grad_norm": 0.010012038052082062,
"learning_rate": 0.00020824902723735411,
"loss": 0.0003,
"step": 2280
},
{
"epoch": 0.5346407517655986,
"grad_norm": 0.002231718273833394,
"learning_rate": 0.00020721141374837877,
"loss": 0.0003,
"step": 2290
},
{
"epoch": 0.5369754275375007,
"grad_norm": 0.0018004688899964094,
"learning_rate": 0.00020617380025940337,
"loss": 0.0001,
"step": 2300
},
{
"epoch": 0.5393101033094029,
"grad_norm": 0.0008523733704350889,
"learning_rate": 0.000205136186770428,
"loss": 0.0001,
"step": 2310
},
{
"epoch": 0.541644779081305,
"grad_norm": 0.0011259455932304263,
"learning_rate": 0.00020409857328145266,
"loss": 0.0,
"step": 2320
},
{
"epoch": 0.5439794548532073,
"grad_norm": 0.0006843574810773134,
"learning_rate": 0.0002030609597924773,
"loss": 0.0,
"step": 2330
},
{
"epoch": 0.5463141306251095,
"grad_norm": 0.0007879248005338013,
"learning_rate": 0.00020202334630350195,
"loss": 0.0,
"step": 2340
},
{
"epoch": 0.5486488063970116,
"grad_norm": 0.0011782748624682426,
"learning_rate": 0.0002009857328145266,
"loss": 0.0001,
"step": 2350
},
{
"epoch": 0.5509834821689138,
"grad_norm": 0.004246284253895283,
"learning_rate": 0.00019994811932555123,
"loss": 0.0163,
"step": 2360
},
{
"epoch": 0.553318157940816,
"grad_norm": 0.7899481058120728,
"learning_rate": 0.0001989105058365759,
"loss": 0.0054,
"step": 2370
},
{
"epoch": 0.5556528337127181,
"grad_norm": 0.0048600370064377785,
"learning_rate": 0.00019787289234760055,
"loss": 0.0001,
"step": 2380
},
{
"epoch": 0.5579875094846203,
"grad_norm": 0.2455766797065735,
"learning_rate": 0.00019683527885862515,
"loss": 0.0009,
"step": 2390
},
{
"epoch": 0.5603221852565226,
"grad_norm": 0.004527187906205654,
"learning_rate": 0.0001957976653696498,
"loss": 0.0005,
"step": 2400
},
{
"epoch": 0.5626568610284247,
"grad_norm": 0.003127218456938863,
"learning_rate": 0.00019476005188067446,
"loss": 0.0001,
"step": 2410
},
{
"epoch": 0.5649915368003269,
"grad_norm": 0.0033744657412171364,
"learning_rate": 0.0001937224383916991,
"loss": 0.0008,
"step": 2420
},
{
"epoch": 0.567326212572229,
"grad_norm": 0.0021291917655617,
"learning_rate": 0.00019268482490272375,
"loss": 0.0001,
"step": 2430
},
{
"epoch": 0.5696608883441312,
"grad_norm": 0.002303266664966941,
"learning_rate": 0.0001916472114137484,
"loss": 0.0001,
"step": 2440
},
{
"epoch": 0.5719955641160334,
"grad_norm": 0.5721760392189026,
"learning_rate": 0.000190609597924773,
"loss": 0.011,
"step": 2450
},
{
"epoch": 0.5743302398879355,
"grad_norm": 1.1442689895629883,
"learning_rate": 0.00018957198443579767,
"loss": 0.0065,
"step": 2460
},
{
"epoch": 0.5766649156598377,
"grad_norm": 0.03165394440293312,
"learning_rate": 0.00018853437094682233,
"loss": 0.001,
"step": 2470
},
{
"epoch": 0.57899959143174,
"grad_norm": 0.007602803409099579,
"learning_rate": 0.00018749675745784696,
"loss": 0.0132,
"step": 2480
},
{
"epoch": 0.5813342672036421,
"grad_norm": 0.026837633922696114,
"learning_rate": 0.0001864591439688716,
"loss": 0.001,
"step": 2490
},
{
"epoch": 0.5836689429755443,
"grad_norm": 0.024656491354107857,
"learning_rate": 0.00018542153047989624,
"loss": 0.0018,
"step": 2500
},
{
"epoch": 0.5860036187474464,
"grad_norm": 0.011152198538184166,
"learning_rate": 0.0001843839169909209,
"loss": 0.0004,
"step": 2510
},
{
"epoch": 0.5883382945193486,
"grad_norm": 0.010260018520057201,
"learning_rate": 0.00018334630350194553,
"loss": 0.0002,
"step": 2520
},
{
"epoch": 0.5906729702912508,
"grad_norm": 0.003675712738186121,
"learning_rate": 0.00018230869001297019,
"loss": 0.0025,
"step": 2530
},
{
"epoch": 0.5930076460631529,
"grad_norm": 0.00440176110714674,
"learning_rate": 0.00018127107652399482,
"loss": 0.0001,
"step": 2540
},
{
"epoch": 0.5953423218350552,
"grad_norm": 0.004901622422039509,
"learning_rate": 0.00018023346303501947,
"loss": 0.0019,
"step": 2550
},
{
"epoch": 0.5976769976069574,
"grad_norm": 0.0028110845014452934,
"learning_rate": 0.0001791958495460441,
"loss": 0.0009,
"step": 2560
},
{
"epoch": 0.6000116733788595,
"grad_norm": 0.0025059175677597523,
"learning_rate": 0.00017815823605706876,
"loss": 0.0,
"step": 2570
},
{
"epoch": 0.6023463491507617,
"grad_norm": 0.0019852565601468086,
"learning_rate": 0.0001771206225680934,
"loss": 0.0,
"step": 2580
},
{
"epoch": 0.6046810249226638,
"grad_norm": 0.0014332541031762958,
"learning_rate": 0.00017608300907911802,
"loss": 0.0004,
"step": 2590
},
{
"epoch": 0.607015700694566,
"grad_norm": 0.32902491092681885,
"learning_rate": 0.00017504539559014268,
"loss": 0.0005,
"step": 2600
},
{
"epoch": 0.6093503764664682,
"grad_norm": 0.0036561412271112204,
"learning_rate": 0.00017400778210116733,
"loss": 0.0011,
"step": 2610
},
{
"epoch": 0.6116850522383704,
"grad_norm": 0.06626530736684799,
"learning_rate": 0.00017297016861219196,
"loss": 0.0001,
"step": 2620
},
{
"epoch": 0.6140197280102726,
"grad_norm": 0.0011569494381546974,
"learning_rate": 0.00017193255512321662,
"loss": 0.0041,
"step": 2630
},
{
"epoch": 0.6163544037821748,
"grad_norm": 0.002706947736442089,
"learning_rate": 0.00017089494163424125,
"loss": 0.0111,
"step": 2640
},
{
"epoch": 0.6186890795540769,
"grad_norm": 0.0033384524285793304,
"learning_rate": 0.00016985732814526588,
"loss": 0.0003,
"step": 2650
},
{
"epoch": 0.6210237553259791,
"grad_norm": 0.0037459495943039656,
"learning_rate": 0.00016881971465629054,
"loss": 0.0001,
"step": 2660
},
{
"epoch": 0.6233584310978812,
"grad_norm": 0.0036509244237095118,
"learning_rate": 0.0001677821011673152,
"loss": 0.0001,
"step": 2670
},
{
"epoch": 0.6256931068697834,
"grad_norm": 0.004108482040464878,
"learning_rate": 0.00016674448767833982,
"loss": 0.0002,
"step": 2680
},
{
"epoch": 0.6280277826416857,
"grad_norm": 0.003080847905948758,
"learning_rate": 0.00016570687418936448,
"loss": 0.0001,
"step": 2690
},
{
"epoch": 0.6303624584135878,
"grad_norm": 0.0028391852974891663,
"learning_rate": 0.0001646692607003891,
"loss": 0.0002,
"step": 2700
},
{
"epoch": 0.63269713418549,
"grad_norm": 0.001830106251873076,
"learning_rate": 0.00016363164721141374,
"loss": 0.0004,
"step": 2710
},
{
"epoch": 0.6350318099573922,
"grad_norm": 0.0024860044941306114,
"learning_rate": 0.0001625940337224384,
"loss": 0.0001,
"step": 2720
},
{
"epoch": 0.6373664857292943,
"grad_norm": 0.0016177381621673703,
"learning_rate": 0.00016155642023346305,
"loss": 0.0003,
"step": 2730
},
{
"epoch": 0.6397011615011965,
"grad_norm": 0.0024260838981717825,
"learning_rate": 0.00016051880674448768,
"loss": 0.0004,
"step": 2740
},
{
"epoch": 0.6420358372730987,
"grad_norm": 0.038342151790857315,
"learning_rate": 0.00015948119325551234,
"loss": 0.0003,
"step": 2750
},
{
"epoch": 0.6443705130450009,
"grad_norm": 0.0013671324122697115,
"learning_rate": 0.00015844357976653697,
"loss": 0.0055,
"step": 2760
},
{
"epoch": 0.6467051888169031,
"grad_norm": 0.0012879414716735482,
"learning_rate": 0.0001574059662775616,
"loss": 0.0,
"step": 2770
},
{
"epoch": 0.6490398645888052,
"grad_norm": 0.0014536501839756966,
"learning_rate": 0.00015636835278858626,
"loss": 0.0,
"step": 2780
},
{
"epoch": 0.6513745403607074,
"grad_norm": 0.8653482794761658,
"learning_rate": 0.00015533073929961092,
"loss": 0.002,
"step": 2790
},
{
"epoch": 0.6537092161326096,
"grad_norm": 0.0016417702427133918,
"learning_rate": 0.00015429312581063555,
"loss": 0.0003,
"step": 2800
},
{
"epoch": 0.6560438919045117,
"grad_norm": 0.07089340686798096,
"learning_rate": 0.0001532555123216602,
"loss": 0.0131,
"step": 2810
},
{
"epoch": 0.6583785676764139,
"grad_norm": 0.0033747514244168997,
"learning_rate": 0.00015221789883268483,
"loss": 0.0028,
"step": 2820
},
{
"epoch": 0.6607132434483162,
"grad_norm": 0.006389171350747347,
"learning_rate": 0.00015118028534370946,
"loss": 0.0002,
"step": 2830
},
{
"epoch": 0.6630479192202183,
"grad_norm": 0.0026964943390339613,
"learning_rate": 0.00015014267185473412,
"loss": 0.0002,
"step": 2840
},
{
"epoch": 0.6653825949921205,
"grad_norm": 0.002625943860039115,
"learning_rate": 0.00014910505836575878,
"loss": 0.0004,
"step": 2850
},
{
"epoch": 0.6677172707640227,
"grad_norm": 0.003076399676501751,
"learning_rate": 0.0001480674448767834,
"loss": 0.0001,
"step": 2860
},
{
"epoch": 0.6700519465359248,
"grad_norm": 0.004514554515480995,
"learning_rate": 0.00014702983138780806,
"loss": 0.0001,
"step": 2870
},
{
"epoch": 0.672386622307827,
"grad_norm": 0.0014996561221778393,
"learning_rate": 0.0001459922178988327,
"loss": 0.0002,
"step": 2880
},
{
"epoch": 0.6747212980797291,
"grad_norm": 0.0015088297659531236,
"learning_rate": 0.00014495460440985732,
"loss": 0.0003,
"step": 2890
},
{
"epoch": 0.6770559738516314,
"grad_norm": 0.0016345715848729014,
"learning_rate": 0.00014391699092088198,
"loss": 0.0,
"step": 2900
},
{
"epoch": 0.6793906496235336,
"grad_norm": 0.0027825534343719482,
"learning_rate": 0.00014287937743190664,
"loss": 0.0008,
"step": 2910
},
{
"epoch": 0.6817253253954357,
"grad_norm": 0.0012849323684349656,
"learning_rate": 0.00014184176394293127,
"loss": 0.0,
"step": 2920
},
{
"epoch": 0.6840600011673379,
"grad_norm": 0.0021583992056548595,
"learning_rate": 0.00014080415045395592,
"loss": 0.0,
"step": 2930
},
{
"epoch": 0.6863946769392401,
"grad_norm": 0.0012560015311464667,
"learning_rate": 0.00013976653696498055,
"loss": 0.0001,
"step": 2940
},
{
"epoch": 0.6887293527111422,
"grad_norm": 0.0009112095576710999,
"learning_rate": 0.00013872892347600518,
"loss": 0.0001,
"step": 2950
},
{
"epoch": 0.6910640284830444,
"grad_norm": 0.0013899313053116202,
"learning_rate": 0.00013769130998702984,
"loss": 0.0,
"step": 2960
},
{
"epoch": 0.6933987042549465,
"grad_norm": 1.1169312000274658,
"learning_rate": 0.0001366536964980545,
"loss": 0.005,
"step": 2970
},
{
"epoch": 0.6957333800268488,
"grad_norm": 0.0009174107108265162,
"learning_rate": 0.00013561608300907913,
"loss": 0.0009,
"step": 2980
},
{
"epoch": 0.698068055798751,
"grad_norm": 0.0038010200951248407,
"learning_rate": 0.00013457846952010378,
"loss": 0.0071,
"step": 2990
},
{
"epoch": 0.7004027315706531,
"grad_norm": 0.002235995838418603,
"learning_rate": 0.00013354085603112841,
"loss": 0.0002,
"step": 3000
},
{
"epoch": 0.7027374073425553,
"grad_norm": 0.039830174297094345,
"learning_rate": 0.00013250324254215304,
"loss": 0.0004,
"step": 3010
},
{
"epoch": 0.7050720831144575,
"grad_norm": 0.30222392082214355,
"learning_rate": 0.0001314656290531777,
"loss": 0.0008,
"step": 3020
},
{
"epoch": 0.7074067588863596,
"grad_norm": 0.47657474875450134,
"learning_rate": 0.00013042801556420233,
"loss": 0.0008,
"step": 3030
},
{
"epoch": 0.7097414346582618,
"grad_norm": 0.0009529945673421025,
"learning_rate": 0.000129390402075227,
"loss": 0.0,
"step": 3040
},
{
"epoch": 0.712076110430164,
"grad_norm": 0.00109247793443501,
"learning_rate": 0.00012835278858625164,
"loss": 0.0004,
"step": 3050
},
{
"epoch": 0.7144107862020662,
"grad_norm": 0.0016496065072715282,
"learning_rate": 0.00012731517509727627,
"loss": 0.0014,
"step": 3060
},
{
"epoch": 0.7167454619739684,
"grad_norm": 0.0007458662148565054,
"learning_rate": 0.0001262775616083009,
"loss": 0.0,
"step": 3070
},
{
"epoch": 0.7190801377458705,
"grad_norm": 0.0010477920295670629,
"learning_rate": 0.00012523994811932556,
"loss": 0.0,
"step": 3080
},
{
"epoch": 0.7214148135177727,
"grad_norm": 0.0039003838319331408,
"learning_rate": 0.0001242023346303502,
"loss": 0.0098,
"step": 3090
},
{
"epoch": 0.7237494892896749,
"grad_norm": 0.6328915953636169,
"learning_rate": 0.00012316472114137485,
"loss": 0.0048,
"step": 3100
},
{
"epoch": 0.726084165061577,
"grad_norm": 0.0023845217656344175,
"learning_rate": 0.0001221271076523995,
"loss": 0.0001,
"step": 3110
},
{
"epoch": 0.7284188408334793,
"grad_norm": 0.005935149732977152,
"learning_rate": 0.00012108949416342412,
"loss": 0.0003,
"step": 3120
},
{
"epoch": 0.7307535166053815,
"grad_norm": 0.002948681591078639,
"learning_rate": 0.00012005188067444876,
"loss": 0.0002,
"step": 3130
},
{
"epoch": 0.7330881923772836,
"grad_norm": 1.1137011051177979,
"learning_rate": 0.00011901426718547342,
"loss": 0.0072,
"step": 3140
},
{
"epoch": 0.7354228681491858,
"grad_norm": 0.03960300236940384,
"learning_rate": 0.00011797665369649807,
"loss": 0.0064,
"step": 3150
},
{
"epoch": 0.7377575439210879,
"grad_norm": 0.004956856369972229,
"learning_rate": 0.00011693904020752271,
"loss": 0.0048,
"step": 3160
},
{
"epoch": 0.7400922196929901,
"grad_norm": 0.0028774456586688757,
"learning_rate": 0.00011590142671854735,
"loss": 0.001,
"step": 3170
},
{
"epoch": 0.7424268954648923,
"grad_norm": 0.05007918179035187,
"learning_rate": 0.00011486381322957198,
"loss": 0.0014,
"step": 3180
},
{
"epoch": 0.7447615712367945,
"grad_norm": 0.004933805204927921,
"learning_rate": 0.00011382619974059663,
"loss": 0.0001,
"step": 3190
},
{
"epoch": 0.7470962470086967,
"grad_norm": 0.0028584490064531565,
"learning_rate": 0.00011278858625162127,
"loss": 0.011,
"step": 3200
},
{
"epoch": 0.7494309227805989,
"grad_norm": 0.015388348139822483,
"learning_rate": 0.00011175097276264593,
"loss": 0.0004,
"step": 3210
},
{
"epoch": 0.751765598552501,
"grad_norm": 0.04148218780755997,
"learning_rate": 0.00011071335927367057,
"loss": 0.0015,
"step": 3220
},
{
"epoch": 0.7541002743244032,
"grad_norm": 0.6182008981704712,
"learning_rate": 0.00010967574578469521,
"loss": 0.0036,
"step": 3230
},
{
"epoch": 0.7564349500963053,
"grad_norm": 0.0034724888391792774,
"learning_rate": 0.00010863813229571984,
"loss": 0.0002,
"step": 3240
},
{
"epoch": 0.7587696258682075,
"grad_norm": 0.019181331619620323,
"learning_rate": 0.00010760051880674449,
"loss": 0.0002,
"step": 3250
},
{
"epoch": 0.7611043016401098,
"grad_norm": 0.007054260466247797,
"learning_rate": 0.00010656290531776913,
"loss": 0.0004,
"step": 3260
},
{
"epoch": 0.7634389774120119,
"grad_norm": 0.0026721367612481117,
"learning_rate": 0.00010552529182879379,
"loss": 0.0001,
"step": 3270
},
{
"epoch": 0.7657736531839141,
"grad_norm": 0.001824371051043272,
"learning_rate": 0.00010448767833981843,
"loss": 0.0002,
"step": 3280
},
{
"epoch": 0.7681083289558163,
"grad_norm": 0.2344302535057068,
"learning_rate": 0.00010345006485084307,
"loss": 0.0004,
"step": 3290
},
{
"epoch": 0.7704430047277184,
"grad_norm": 0.0015703398967161775,
"learning_rate": 0.0001024124513618677,
"loss": 0.0,
"step": 3300
},
{
"epoch": 0.7727776804996206,
"grad_norm": 0.0013199965469539165,
"learning_rate": 0.00010137483787289235,
"loss": 0.0,
"step": 3310
},
{
"epoch": 0.7751123562715228,
"grad_norm": 0.001341565977782011,
"learning_rate": 0.00010033722438391699,
"loss": 0.0,
"step": 3320
},
{
"epoch": 0.777447032043425,
"grad_norm": 0.0012060283916071057,
"learning_rate": 9.929961089494165e-05,
"loss": 0.0001,
"step": 3330
},
{
"epoch": 0.7797817078153272,
"grad_norm": 0.0020445692352950573,
"learning_rate": 9.826199740596628e-05,
"loss": 0.0,
"step": 3340
},
{
"epoch": 0.7821163835872293,
"grad_norm": 0.0010797139257192612,
"learning_rate": 9.722438391699092e-05,
"loss": 0.0,
"step": 3350
},
{
"epoch": 0.7844510593591315,
"grad_norm": 0.0013513348530977964,
"learning_rate": 9.618677042801558e-05,
"loss": 0.0,
"step": 3360
},
{
"epoch": 0.7867857351310337,
"grad_norm": 0.0008800049545243382,
"learning_rate": 9.514915693904021e-05,
"loss": 0.0,
"step": 3370
},
{
"epoch": 0.7891204109029358,
"grad_norm": 0.001039789873175323,
"learning_rate": 9.411154345006485e-05,
"loss": 0.0,
"step": 3380
},
{
"epoch": 0.791455086674838,
"grad_norm": 0.0011056034127250314,
"learning_rate": 9.307392996108951e-05,
"loss": 0.0,
"step": 3390
},
{
"epoch": 0.7937897624467402,
"grad_norm": 0.00087336590513587,
"learning_rate": 9.203631647211414e-05,
"loss": 0.0005,
"step": 3400
},
{
"epoch": 0.7961244382186424,
"grad_norm": 0.0016204583225771785,
"learning_rate": 9.099870298313878e-05,
"loss": 0.0,
"step": 3410
},
{
"epoch": 0.7984591139905446,
"grad_norm": 0.0010950096184387803,
"learning_rate": 8.996108949416342e-05,
"loss": 0.0,
"step": 3420
},
{
"epoch": 0.8007937897624468,
"grad_norm": 0.0011948348255828023,
"learning_rate": 8.892347600518807e-05,
"loss": 0.0037,
"step": 3430
},
{
"epoch": 0.8031284655343489,
"grad_norm": 0.009840068407356739,
"learning_rate": 8.788586251621271e-05,
"loss": 0.0001,
"step": 3440
},
{
"epoch": 0.8054631413062511,
"grad_norm": 0.0017067514127120376,
"learning_rate": 8.684824902723735e-05,
"loss": 0.0001,
"step": 3450
},
{
"epoch": 0.8077978170781532,
"grad_norm": 0.0011140963761135936,
"learning_rate": 8.5810635538262e-05,
"loss": 0.004,
"step": 3460
},
{
"epoch": 0.8101324928500554,
"grad_norm": 0.7195191979408264,
"learning_rate": 8.477302204928664e-05,
"loss": 0.0035,
"step": 3470
},
{
"epoch": 0.8124671686219577,
"grad_norm": 0.0012634329032152891,
"learning_rate": 8.373540856031128e-05,
"loss": 0.0001,
"step": 3480
},
{
"epoch": 0.8148018443938598,
"grad_norm": 0.0016726938774809241,
"learning_rate": 8.269779507133593e-05,
"loss": 0.0006,
"step": 3490
},
{
"epoch": 0.817136520165762,
"grad_norm": 0.0019955493044108152,
"learning_rate": 8.166018158236057e-05,
"loss": 0.0006,
"step": 3500
},
{
"epoch": 0.8194711959376642,
"grad_norm": 0.0008943151333369315,
"learning_rate": 8.062256809338522e-05,
"loss": 0.0006,
"step": 3510
},
{
"epoch": 0.8218058717095663,
"grad_norm": 0.0013045528903603554,
"learning_rate": 7.958495460440986e-05,
"loss": 0.0002,
"step": 3520
},
{
"epoch": 0.8241405474814685,
"grad_norm": 0.0010028982069343328,
"learning_rate": 7.85473411154345e-05,
"loss": 0.0,
"step": 3530
},
{
"epoch": 0.8264752232533706,
"grad_norm": 0.0007102734525687993,
"learning_rate": 7.750972762645915e-05,
"loss": 0.0001,
"step": 3540
},
{
"epoch": 0.8288098990252729,
"grad_norm": 0.0014275240246206522,
"learning_rate": 7.647211413748379e-05,
"loss": 0.0,
"step": 3550
},
{
"epoch": 0.8311445747971751,
"grad_norm": 0.0009326430154033005,
"learning_rate": 7.543450064850843e-05,
"loss": 0.0,
"step": 3560
},
{
"epoch": 0.8334792505690772,
"grad_norm": 0.0008573593804612756,
"learning_rate": 7.439688715953308e-05,
"loss": 0.0001,
"step": 3570
},
{
"epoch": 0.8358139263409794,
"grad_norm": 0.0015563720371574163,
"learning_rate": 7.335927367055772e-05,
"loss": 0.0002,
"step": 3580
},
{
"epoch": 0.8381486021128816,
"grad_norm": 0.0008948877803049982,
"learning_rate": 7.232166018158236e-05,
"loss": 0.0,
"step": 3590
},
{
"epoch": 0.8404832778847837,
"grad_norm": 0.0015601961640641093,
"learning_rate": 7.1284046692607e-05,
"loss": 0.0,
"step": 3600
},
{
"epoch": 0.8428179536566859,
"grad_norm": 0.0013114233734086156,
"learning_rate": 7.024643320363165e-05,
"loss": 0.0073,
"step": 3610
},
{
"epoch": 0.8451526294285882,
"grad_norm": 0.00176639249548316,
"learning_rate": 6.920881971465629e-05,
"loss": 0.0037,
"step": 3620
},
{
"epoch": 0.8474873052004903,
"grad_norm": 0.002710576867684722,
"learning_rate": 6.817120622568094e-05,
"loss": 0.0001,
"step": 3630
},
{
"epoch": 0.8498219809723925,
"grad_norm": 0.13074593245983124,
"learning_rate": 6.713359273670558e-05,
"loss": 0.001,
"step": 3640
},
{
"epoch": 0.8521566567442946,
"grad_norm": 0.002523267176002264,
"learning_rate": 6.609597924773022e-05,
"loss": 0.0001,
"step": 3650
},
{
"epoch": 0.8544913325161968,
"grad_norm": 0.002858164021745324,
"learning_rate": 6.505836575875487e-05,
"loss": 0.0003,
"step": 3660
},
{
"epoch": 0.856826008288099,
"grad_norm": 0.002222646027803421,
"learning_rate": 6.402075226977951e-05,
"loss": 0.0006,
"step": 3670
},
{
"epoch": 0.8591606840600011,
"grad_norm": 0.03722568228840828,
"learning_rate": 6.298313878080415e-05,
"loss": 0.0001,
"step": 3680
},
{
"epoch": 0.8614953598319034,
"grad_norm": 0.0012012380175292492,
"learning_rate": 6.19455252918288e-05,
"loss": 0.0,
"step": 3690
},
{
"epoch": 0.8638300356038056,
"grad_norm": 0.0019116230541840196,
"learning_rate": 6.0907911802853433e-05,
"loss": 0.0,
"step": 3700
},
{
"epoch": 0.8661647113757077,
"grad_norm": 0.0011818850180134177,
"learning_rate": 5.9870298313878084e-05,
"loss": 0.0001,
"step": 3710
},
{
"epoch": 0.8684993871476099,
"grad_norm": 0.0008876454085111618,
"learning_rate": 5.883268482490273e-05,
"loss": 0.0001,
"step": 3720
},
{
"epoch": 0.870834062919512,
"grad_norm": 0.0011559055419638753,
"learning_rate": 5.7795071335927364e-05,
"loss": 0.0013,
"step": 3730
},
{
"epoch": 0.8731687386914142,
"grad_norm": 0.0008210024680010974,
"learning_rate": 5.6757457846952014e-05,
"loss": 0.0001,
"step": 3740
},
{
"epoch": 0.8755034144633164,
"grad_norm": 0.0019268837058916688,
"learning_rate": 5.571984435797666e-05,
"loss": 0.0001,
"step": 3750
},
{
"epoch": 0.8778380902352186,
"grad_norm": 0.40103089809417725,
"learning_rate": 5.4682230869001294e-05,
"loss": 0.0006,
"step": 3760
},
{
"epoch": 0.8801727660071208,
"grad_norm": 0.002693564398214221,
"learning_rate": 5.3644617380025944e-05,
"loss": 0.002,
"step": 3770
},
{
"epoch": 0.882507441779023,
"grad_norm": 0.11337973922491074,
"learning_rate": 5.260700389105059e-05,
"loss": 0.0002,
"step": 3780
},
{
"epoch": 0.8848421175509251,
"grad_norm": 0.000948163156863302,
"learning_rate": 5.156939040207524e-05,
"loss": 0.01,
"step": 3790
},
{
"epoch": 0.8871767933228273,
"grad_norm": 0.0012289845617488027,
"learning_rate": 5.053177691309987e-05,
"loss": 0.0051,
"step": 3800
},
{
"epoch": 0.8895114690947294,
"grad_norm": 1.0166712999343872,
"learning_rate": 4.949416342412452e-05,
"loss": 0.0009,
"step": 3810
},
{
"epoch": 0.8918461448666316,
"grad_norm": 0.0020474784541875124,
"learning_rate": 4.845654993514916e-05,
"loss": 0.0003,
"step": 3820
},
{
"epoch": 0.8941808206385338,
"grad_norm": 0.0022713476791977882,
"learning_rate": 4.74189364461738e-05,
"loss": 0.0001,
"step": 3830
},
{
"epoch": 0.896515496410436,
"grad_norm": 0.004341310355812311,
"learning_rate": 4.638132295719845e-05,
"loss": 0.0,
"step": 3840
},
{
"epoch": 0.8988501721823382,
"grad_norm": 0.0015770102618262172,
"learning_rate": 4.534370946822309e-05,
"loss": 0.0001,
"step": 3850
},
{
"epoch": 0.9011848479542404,
"grad_norm": 0.012255331501364708,
"learning_rate": 4.430609597924773e-05,
"loss": 0.0001,
"step": 3860
},
{
"epoch": 0.9035195237261425,
"grad_norm": 0.02712065726518631,
"learning_rate": 4.326848249027238e-05,
"loss": 0.0001,
"step": 3870
},
{
"epoch": 0.9058541994980447,
"grad_norm": 0.001247554668225348,
"learning_rate": 4.223086900129702e-05,
"loss": 0.0,
"step": 3880
},
{
"epoch": 0.9081888752699468,
"grad_norm": 0.0011685139033943415,
"learning_rate": 4.119325551232166e-05,
"loss": 0.0,
"step": 3890
},
{
"epoch": 0.910523551041849,
"grad_norm": 0.0018253360176458955,
"learning_rate": 4.015564202334631e-05,
"loss": 0.0,
"step": 3900
},
{
"epoch": 0.9128582268137513,
"grad_norm": 0.0008875974453985691,
"learning_rate": 3.9118028534370945e-05,
"loss": 0.0,
"step": 3910
},
{
"epoch": 0.9151929025856534,
"grad_norm": 0.0011628433130681515,
"learning_rate": 3.808041504539559e-05,
"loss": 0.0001,
"step": 3920
},
{
"epoch": 0.9175275783575556,
"grad_norm": 0.001058564055711031,
"learning_rate": 3.704280155642024e-05,
"loss": 0.0,
"step": 3930
},
{
"epoch": 0.9198622541294578,
"grad_norm": 0.0010234726360067725,
"learning_rate": 3.6005188067444876e-05,
"loss": 0.0,
"step": 3940
},
{
"epoch": 0.9221969299013599,
"grad_norm": 0.0009812922216951847,
"learning_rate": 3.496757457846952e-05,
"loss": 0.0004,
"step": 3950
},
{
"epoch": 0.9245316056732621,
"grad_norm": 0.0009394401567988098,
"learning_rate": 3.392996108949417e-05,
"loss": 0.0,
"step": 3960
},
{
"epoch": 0.9268662814451643,
"grad_norm": 0.0009512811666354537,
"learning_rate": 3.2892347600518806e-05,
"loss": 0.0001,
"step": 3970
},
{
"epoch": 0.9292009572170665,
"grad_norm": 0.0007261955761350691,
"learning_rate": 3.185473411154345e-05,
"loss": 0.0,
"step": 3980
},
{
"epoch": 0.9315356329889687,
"grad_norm": 0.0010610457975417376,
"learning_rate": 3.08171206225681e-05,
"loss": 0.0,
"step": 3990
},
{
"epoch": 0.9338703087608708,
"grad_norm": 0.0012513543479144573,
"learning_rate": 2.9779507133592736e-05,
"loss": 0.0,
"step": 4000
},
{
"epoch": 0.936204984532773,
"grad_norm": 0.0014717354206368327,
"learning_rate": 2.874189364461738e-05,
"loss": 0.0011,
"step": 4010
},
{
"epoch": 0.9385396603046752,
"grad_norm": 0.0008392130257561803,
"learning_rate": 2.7704280155642027e-05,
"loss": 0.0,
"step": 4020
},
{
"epoch": 0.9408743360765773,
"grad_norm": 0.0015690367436036468,
"learning_rate": 2.6666666666666667e-05,
"loss": 0.0,
"step": 4030
},
{
"epoch": 0.9432090118484795,
"grad_norm": 0.0011248665396124125,
"learning_rate": 2.562905317769131e-05,
"loss": 0.002,
"step": 4040
},
{
"epoch": 0.9455436876203818,
"grad_norm": 0.0010001506889238954,
"learning_rate": 2.4591439688715953e-05,
"loss": 0.0,
"step": 4050
},
{
"epoch": 0.9478783633922839,
"grad_norm": 0.001105117262341082,
"learning_rate": 2.3553826199740597e-05,
"loss": 0.0,
"step": 4060
},
{
"epoch": 0.9502130391641861,
"grad_norm": 0.010793734341859818,
"learning_rate": 2.251621271076524e-05,
"loss": 0.0001,
"step": 4070
},
{
"epoch": 0.9525477149360883,
"grad_norm": 0.0010343483882024884,
"learning_rate": 2.1478599221789884e-05,
"loss": 0.0001,
"step": 4080
},
{
"epoch": 0.9548823907079904,
"grad_norm": 0.0006852949154563248,
"learning_rate": 2.0440985732814527e-05,
"loss": 0.0,
"step": 4090
},
{
"epoch": 0.9572170664798926,
"grad_norm": 0.001185077242553234,
"learning_rate": 1.940337224383917e-05,
"loss": 0.0,
"step": 4100
},
{
"epoch": 0.9595517422517947,
"grad_norm": 0.055520687252283096,
"learning_rate": 1.8365758754863814e-05,
"loss": 0.0001,
"step": 4110
},
{
"epoch": 0.961886418023697,
"grad_norm": 0.0012979560997337103,
"learning_rate": 1.7328145265888457e-05,
"loss": 0.0,
"step": 4120
},
{
"epoch": 0.9642210937955992,
"grad_norm": 0.0013245136942714453,
"learning_rate": 1.62905317769131e-05,
"loss": 0.0,
"step": 4130
},
{
"epoch": 0.9665557695675013,
"grad_norm": 0.0013994915643706918,
"learning_rate": 1.5252918287937746e-05,
"loss": 0.002,
"step": 4140
},
{
"epoch": 0.9688904453394035,
"grad_norm": 0.6272192001342773,
"learning_rate": 1.4215304798962386e-05,
"loss": 0.0013,
"step": 4150
},
{
"epoch": 0.9712251211113057,
"grad_norm": 0.0012450398644432425,
"learning_rate": 1.3177691309987031e-05,
"loss": 0.0002,
"step": 4160
},
{
"epoch": 0.9735597968832078,
"grad_norm": 0.0009830017806962132,
"learning_rate": 1.2140077821011673e-05,
"loss": 0.0001,
"step": 4170
},
{
"epoch": 0.97589447265511,
"grad_norm": 0.0007283110171556473,
"learning_rate": 1.1102464332036316e-05,
"loss": 0.0,
"step": 4180
},
{
"epoch": 0.9782291484270123,
"grad_norm": 0.0008772446890361607,
"learning_rate": 1.006485084306096e-05,
"loss": 0.0,
"step": 4190
},
{
"epoch": 0.9805638241989144,
"grad_norm": 0.0007983844261616468,
"learning_rate": 9.027237354085603e-06,
"loss": 0.0002,
"step": 4200
},
{
"epoch": 0.9828984999708166,
"grad_norm": 0.0045978049747645855,
"learning_rate": 7.989623865110247e-06,
"loss": 0.0001,
"step": 4210
},
{
"epoch": 0.9852331757427187,
"grad_norm": 0.0006691565504297614,
"learning_rate": 6.95201037613489e-06,
"loss": 0.0,
"step": 4220
},
{
"epoch": 0.9875678515146209,
"grad_norm": 0.000847226707264781,
"learning_rate": 5.9143968871595335e-06,
"loss": 0.0,
"step": 4230
},
{
"epoch": 0.9899025272865231,
"grad_norm": 0.0008708458044566214,
"learning_rate": 4.876783398184177e-06,
"loss": 0.0,
"step": 4240
},
{
"epoch": 0.9922372030584252,
"grad_norm": 0.001017833361402154,
"learning_rate": 3.83916990920882e-06,
"loss": 0.0,
"step": 4250
},
{
"epoch": 0.9945718788303275,
"grad_norm": 0.001224992680363357,
"learning_rate": 2.8015564202334633e-06,
"loss": 0.0,
"step": 4260
},
{
"epoch": 0.9969065546022297,
"grad_norm": 0.0007001100457273424,
"learning_rate": 1.7639429312581063e-06,
"loss": 0.0001,
"step": 4270
},
{
"epoch": 0.9992412303741318,
"grad_norm": 0.0008271584520116448,
"learning_rate": 7.263294422827498e-07,
"loss": 0.0001,
"step": 4280
},
{
"epoch": 1.0,
"eval_accuracy": 0.9999124458258547,
"eval_loss": 0.00038632494397461414,
"eval_runtime": 4629.6003,
"eval_samples_per_second": 14.802,
"eval_steps_per_second": 0.925,
"step": 4284
}
],
"logging_steps": 10,
"max_steps": 4284,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.44199965772096e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}