Concept_Mapping_UMLS_2e-5 / trainer_state.json
jiminHuang's picture
Upload folder using huggingface_hub
6d4b373 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.03451197479245361,
"eval_steps": 500,
"global_step": 40000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 4.3139968490567015e-05,
"grad_norm": 36.896514892578125,
"learning_rate": 3.3333333333333333e-06,
"loss": 2.8457,
"step": 50
},
{
"epoch": 8.627993698113403e-05,
"grad_norm": 35.37440490722656,
"learning_rate": 6.666666666666667e-06,
"loss": 2.1361,
"step": 100
},
{
"epoch": 0.00012941990547170104,
"grad_norm": 31.632505416870117,
"learning_rate": 1e-05,
"loss": 0.777,
"step": 150
},
{
"epoch": 0.00017255987396226806,
"grad_norm": 0.4134848415851593,
"learning_rate": 1.3333333333333333e-05,
"loss": 0.4916,
"step": 200
},
{
"epoch": 0.00021569984245283508,
"grad_norm": 37.35564422607422,
"learning_rate": 1.6666666666666667e-05,
"loss": 0.381,
"step": 250
},
{
"epoch": 0.0002588398109434021,
"grad_norm": 45.536712646484375,
"learning_rate": 2e-05,
"loss": 0.2516,
"step": 300
},
{
"epoch": 0.0003019797794339691,
"grad_norm": 19.42644500732422,
"learning_rate": 1.9999136977245545e-05,
"loss": 0.2801,
"step": 350
},
{
"epoch": 0.0003451197479245361,
"grad_norm": 54.93406295776367,
"learning_rate": 1.9998273954491085e-05,
"loss": 0.1867,
"step": 400
},
{
"epoch": 0.00038825971641510314,
"grad_norm": 41.097774505615234,
"learning_rate": 1.9997410931736628e-05,
"loss": 0.1246,
"step": 450
},
{
"epoch": 0.00043139968490567016,
"grad_norm": 0.38864001631736755,
"learning_rate": 1.9996547908982168e-05,
"loss": 0.1049,
"step": 500
},
{
"epoch": 0.0004745396533962372,
"grad_norm": 0.041049182415008545,
"learning_rate": 1.999568488622771e-05,
"loss": 0.2315,
"step": 550
},
{
"epoch": 0.0005176796218868042,
"grad_norm": 0.002712072106078267,
"learning_rate": 1.9994821863473255e-05,
"loss": 0.1082,
"step": 600
},
{
"epoch": 0.0005608195903773712,
"grad_norm": 2.7014479201170616e-05,
"learning_rate": 1.9993958840718798e-05,
"loss": 0.0578,
"step": 650
},
{
"epoch": 0.0006039595588679382,
"grad_norm": 1.4746110439300537,
"learning_rate": 1.9993095817964338e-05,
"loss": 0.2376,
"step": 700
},
{
"epoch": 0.0006470995273585052,
"grad_norm": 28.263187408447266,
"learning_rate": 1.999223279520988e-05,
"loss": 0.0866,
"step": 750
},
{
"epoch": 0.0006902394958490722,
"grad_norm": 11.379521369934082,
"learning_rate": 1.999136977245542e-05,
"loss": 0.0782,
"step": 800
},
{
"epoch": 0.0007333794643396393,
"grad_norm": 0.0013383959885686636,
"learning_rate": 1.9990506749700965e-05,
"loss": 0.1511,
"step": 850
},
{
"epoch": 0.0007765194328302063,
"grad_norm": 6.122570991516113,
"learning_rate": 1.9989643726946505e-05,
"loss": 0.0885,
"step": 900
},
{
"epoch": 0.0008196594013207733,
"grad_norm": 0.15970896184444427,
"learning_rate": 1.9988780704192048e-05,
"loss": 0.1027,
"step": 950
},
{
"epoch": 0.0008627993698113403,
"grad_norm": 0.19297116994857788,
"learning_rate": 1.998791768143759e-05,
"loss": 0.07,
"step": 1000
},
{
"epoch": 0.0009059393383019073,
"grad_norm": 0.00016763704479672015,
"learning_rate": 1.998705465868313e-05,
"loss": 0.1106,
"step": 1050
},
{
"epoch": 0.0009490793067924744,
"grad_norm": 0.0003569670661818236,
"learning_rate": 1.9986191635928675e-05,
"loss": 0.0801,
"step": 1100
},
{
"epoch": 0.0009922192752830413,
"grad_norm": 34.73747253417969,
"learning_rate": 1.9985328613174218e-05,
"loss": 0.152,
"step": 1150
},
{
"epoch": 0.0010353592437736083,
"grad_norm": 0.5465057492256165,
"learning_rate": 1.9984465590419758e-05,
"loss": 0.082,
"step": 1200
},
{
"epoch": 0.0010784992122641753,
"grad_norm": 0.0005398567882366478,
"learning_rate": 1.99836025676653e-05,
"loss": 0.1238,
"step": 1250
},
{
"epoch": 0.0011216391807547423,
"grad_norm": 28.343412399291992,
"learning_rate": 1.9982739544910845e-05,
"loss": 0.079,
"step": 1300
},
{
"epoch": 0.0011647791492453094,
"grad_norm": 0.18938343226909637,
"learning_rate": 1.9981876522156385e-05,
"loss": 0.0968,
"step": 1350
},
{
"epoch": 0.0012079191177358764,
"grad_norm": 18.69659996032715,
"learning_rate": 1.9981013499401928e-05,
"loss": 0.152,
"step": 1400
},
{
"epoch": 0.0012510590862264434,
"grad_norm": 26.612380981445312,
"learning_rate": 1.9980150476647468e-05,
"loss": 0.0549,
"step": 1450
},
{
"epoch": 0.0012941990547170104,
"grad_norm": 0.0005812590825371444,
"learning_rate": 1.997928745389301e-05,
"loss": 0.0668,
"step": 1500
},
{
"epoch": 0.0013373390232075775,
"grad_norm": 0.15176478028297424,
"learning_rate": 1.997842443113855e-05,
"loss": 0.1209,
"step": 1550
},
{
"epoch": 0.0013804789916981445,
"grad_norm": 32.04401779174805,
"learning_rate": 1.9977561408384094e-05,
"loss": 0.1198,
"step": 1600
},
{
"epoch": 0.0014236189601887115,
"grad_norm": 0.6346271634101868,
"learning_rate": 1.9976698385629638e-05,
"loss": 0.0893,
"step": 1650
},
{
"epoch": 0.0014667589286792785,
"grad_norm": 40.96885681152344,
"learning_rate": 1.997583536287518e-05,
"loss": 0.1393,
"step": 1700
},
{
"epoch": 0.0015098988971698455,
"grad_norm": 0.29022184014320374,
"learning_rate": 1.997497234012072e-05,
"loss": 0.0832,
"step": 1750
},
{
"epoch": 0.0015530388656604126,
"grad_norm": 0.6716536283493042,
"learning_rate": 1.9974109317366264e-05,
"loss": 0.0558,
"step": 1800
},
{
"epoch": 0.0015961788341509796,
"grad_norm": 0.19002307951450348,
"learning_rate": 1.9973246294611804e-05,
"loss": 0.0881,
"step": 1850
},
{
"epoch": 0.0016393188026415466,
"grad_norm": 0.24587740004062653,
"learning_rate": 1.9972383271857348e-05,
"loss": 0.0776,
"step": 1900
},
{
"epoch": 0.0016824587711321136,
"grad_norm": 28.058324813842773,
"learning_rate": 1.9971520249102888e-05,
"loss": 0.0907,
"step": 1950
},
{
"epoch": 0.0017255987396226807,
"grad_norm": 80.17859649658203,
"learning_rate": 1.997065722634843e-05,
"loss": 0.0566,
"step": 2000
},
{
"epoch": 0.0017687387081132477,
"grad_norm": 0.020453251898288727,
"learning_rate": 1.9969794203593974e-05,
"loss": 0.066,
"step": 2050
},
{
"epoch": 0.0018118786766038147,
"grad_norm": 1.5788724340382032e-05,
"learning_rate": 1.9968931180839514e-05,
"loss": 0.1197,
"step": 2100
},
{
"epoch": 0.0018550186450943817,
"grad_norm": 0.008944077417254448,
"learning_rate": 1.9968068158085058e-05,
"loss": 0.0495,
"step": 2150
},
{
"epoch": 0.0018981586135849487,
"grad_norm": 3.117482719972031e-06,
"learning_rate": 1.99672051353306e-05,
"loss": 0.0969,
"step": 2200
},
{
"epoch": 0.0019412985820755155,
"grad_norm": 4.803666114807129,
"learning_rate": 1.996634211257614e-05,
"loss": 0.0534,
"step": 2250
},
{
"epoch": 0.0019844385505660826,
"grad_norm": 2.6723075279733166e-05,
"learning_rate": 1.9965479089821684e-05,
"loss": 0.0841,
"step": 2300
},
{
"epoch": 0.0020275785190566496,
"grad_norm": 0.0005806431290693581,
"learning_rate": 1.9964616067067228e-05,
"loss": 0.1267,
"step": 2350
},
{
"epoch": 0.0020707184875472166,
"grad_norm": 6.426816253224388e-05,
"learning_rate": 1.9963753044312767e-05,
"loss": 0.0404,
"step": 2400
},
{
"epoch": 0.0021138584560377836,
"grad_norm": 5.425294876098633,
"learning_rate": 1.996289002155831e-05,
"loss": 0.1306,
"step": 2450
},
{
"epoch": 0.0021569984245283507,
"grad_norm": 0.5509458780288696,
"learning_rate": 1.996202699880385e-05,
"loss": 0.1097,
"step": 2500
},
{
"epoch": 0.0022001383930189177,
"grad_norm": 1.9030728992674995e-08,
"learning_rate": 1.9961163976049394e-05,
"loss": 0.0954,
"step": 2550
},
{
"epoch": 0.0022432783615094847,
"grad_norm": 5.162133693695068,
"learning_rate": 1.9960300953294934e-05,
"loss": 0.0495,
"step": 2600
},
{
"epoch": 0.0022864183300000517,
"grad_norm": 0.001043809694238007,
"learning_rate": 1.9959437930540477e-05,
"loss": 0.0578,
"step": 2650
},
{
"epoch": 0.0023295582984906187,
"grad_norm": 10.08859634399414,
"learning_rate": 1.995857490778602e-05,
"loss": 0.0479,
"step": 2700
},
{
"epoch": 0.0023726982669811858,
"grad_norm": 0.00013425115321297199,
"learning_rate": 1.9957711885031564e-05,
"loss": 0.0999,
"step": 2750
},
{
"epoch": 0.002415838235471753,
"grad_norm": 0.5551994442939758,
"learning_rate": 1.9956848862277104e-05,
"loss": 0.0828,
"step": 2800
},
{
"epoch": 0.00245897820396232,
"grad_norm": 1.9412257671356201,
"learning_rate": 1.9955985839522647e-05,
"loss": 0.0374,
"step": 2850
},
{
"epoch": 0.002502118172452887,
"grad_norm": 0.2069123089313507,
"learning_rate": 1.9955122816768187e-05,
"loss": 0.0201,
"step": 2900
},
{
"epoch": 0.002545258140943454,
"grad_norm": 1.6855838111951016e-05,
"learning_rate": 1.995425979401373e-05,
"loss": 0.0395,
"step": 2950
},
{
"epoch": 0.002588398109434021,
"grad_norm": 4.1953666141125723e-07,
"learning_rate": 1.9953396771259274e-05,
"loss": 0.081,
"step": 3000
},
{
"epoch": 0.002631538077924588,
"grad_norm": 29.5993709564209,
"learning_rate": 1.9952533748504814e-05,
"loss": 0.0553,
"step": 3050
},
{
"epoch": 0.002674678046415155,
"grad_norm": 3.231801031233772e-07,
"learning_rate": 1.9951670725750357e-05,
"loss": 0.153,
"step": 3100
},
{
"epoch": 0.002717818014905722,
"grad_norm": 2.516810655593872,
"learning_rate": 1.9950807702995897e-05,
"loss": 0.0492,
"step": 3150
},
{
"epoch": 0.002760957983396289,
"grad_norm": 1.1921870708465576,
"learning_rate": 1.994994468024144e-05,
"loss": 0.0816,
"step": 3200
},
{
"epoch": 0.002804097951886856,
"grad_norm": 2.2311925888061523,
"learning_rate": 1.9949081657486984e-05,
"loss": 0.0785,
"step": 3250
},
{
"epoch": 0.002847237920377423,
"grad_norm": 6.03306652919855e-05,
"learning_rate": 1.9948218634732527e-05,
"loss": 0.0706,
"step": 3300
},
{
"epoch": 0.00289037788886799,
"grad_norm": 0.014764097519218922,
"learning_rate": 1.9947355611978067e-05,
"loss": 0.0731,
"step": 3350
},
{
"epoch": 0.002933517857358557,
"grad_norm": 0.007481596898287535,
"learning_rate": 1.994649258922361e-05,
"loss": 0.0535,
"step": 3400
},
{
"epoch": 0.002976657825849124,
"grad_norm": 0.35124772787094116,
"learning_rate": 1.994562956646915e-05,
"loss": 0.056,
"step": 3450
},
{
"epoch": 0.003019797794339691,
"grad_norm": 2.102785583701916e-05,
"learning_rate": 1.9944766543714694e-05,
"loss": 0.0412,
"step": 3500
},
{
"epoch": 0.003062937762830258,
"grad_norm": 11.827704429626465,
"learning_rate": 1.9943903520960234e-05,
"loss": 0.0194,
"step": 3550
},
{
"epoch": 0.003106077731320825,
"grad_norm": 0.0012801631819456816,
"learning_rate": 1.9943040498205777e-05,
"loss": 0.0573,
"step": 3600
},
{
"epoch": 0.003149217699811392,
"grad_norm": 6.006156905158377e-09,
"learning_rate": 1.994217747545132e-05,
"loss": 0.0459,
"step": 3650
},
{
"epoch": 0.003192357668301959,
"grad_norm": 2.7759302412277975e-08,
"learning_rate": 1.994131445269686e-05,
"loss": 0.1287,
"step": 3700
},
{
"epoch": 0.003235497636792526,
"grad_norm": 2.103457186208857e-09,
"learning_rate": 1.9940451429942404e-05,
"loss": 0.0164,
"step": 3750
},
{
"epoch": 0.0032786376052830932,
"grad_norm": 2.2541730981817665e-10,
"learning_rate": 1.9939588407187947e-05,
"loss": 0.0747,
"step": 3800
},
{
"epoch": 0.0033217775737736602,
"grad_norm": 5.149080607225187e-05,
"learning_rate": 1.9938725384433487e-05,
"loss": 0.0055,
"step": 3850
},
{
"epoch": 0.0033649175422642273,
"grad_norm": 1.0809308290481567,
"learning_rate": 1.993786236167903e-05,
"loss": 0.0665,
"step": 3900
},
{
"epoch": 0.0034080575107547943,
"grad_norm": 40.65428924560547,
"learning_rate": 1.9936999338924574e-05,
"loss": 0.0494,
"step": 3950
},
{
"epoch": 0.0034511974792453613,
"grad_norm": 2.0113883018493652,
"learning_rate": 1.9936136316170113e-05,
"loss": 0.0212,
"step": 4000
},
{
"epoch": 0.0034943374477359283,
"grad_norm": 4.40586519241333,
"learning_rate": 1.9935273293415657e-05,
"loss": 0.0988,
"step": 4050
},
{
"epoch": 0.0035374774162264954,
"grad_norm": 1.0736999684013426e-05,
"learning_rate": 1.9934410270661197e-05,
"loss": 0.093,
"step": 4100
},
{
"epoch": 0.0035806173847170624,
"grad_norm": 8.809935820863757e-07,
"learning_rate": 1.993354724790674e-05,
"loss": 0.0384,
"step": 4150
},
{
"epoch": 0.0036237573532076294,
"grad_norm": 2.5714776515960693,
"learning_rate": 1.993268422515228e-05,
"loss": 0.0904,
"step": 4200
},
{
"epoch": 0.0036668973216981964,
"grad_norm": 1.5415873022561755e-09,
"learning_rate": 1.9931821202397823e-05,
"loss": 0.0201,
"step": 4250
},
{
"epoch": 0.0037100372901887634,
"grad_norm": 0.0013566080015152693,
"learning_rate": 1.9930958179643367e-05,
"loss": 0.116,
"step": 4300
},
{
"epoch": 0.0037531772586793305,
"grad_norm": 0.036448314785957336,
"learning_rate": 1.993009515688891e-05,
"loss": 0.0538,
"step": 4350
},
{
"epoch": 0.0037963172271698975,
"grad_norm": 1.335322380065918,
"learning_rate": 1.992923213413445e-05,
"loss": 0.0343,
"step": 4400
},
{
"epoch": 0.0038394571956604645,
"grad_norm": 0.001166568254120648,
"learning_rate": 1.9928369111379993e-05,
"loss": 0.0827,
"step": 4450
},
{
"epoch": 0.003882597164151031,
"grad_norm": 2.504633656030819e-08,
"learning_rate": 1.9927506088625533e-05,
"loss": 0.0871,
"step": 4500
},
{
"epoch": 0.0039257371326415985,
"grad_norm": 1.2820944903069176e-05,
"learning_rate": 1.9926643065871077e-05,
"loss": 0.0427,
"step": 4550
},
{
"epoch": 0.003968877101132165,
"grad_norm": 0.0003728326119016856,
"learning_rate": 1.9925780043116617e-05,
"loss": 0.0448,
"step": 4600
},
{
"epoch": 0.004012017069622733,
"grad_norm": 0.1788995862007141,
"learning_rate": 1.992491702036216e-05,
"loss": 0.0341,
"step": 4650
},
{
"epoch": 0.004055157038113299,
"grad_norm": 1.2131690709793475e-05,
"learning_rate": 1.9924053997607703e-05,
"loss": 0.0456,
"step": 4700
},
{
"epoch": 0.004098297006603867,
"grad_norm": 0.09960448741912842,
"learning_rate": 1.9923190974853243e-05,
"loss": 0.0219,
"step": 4750
},
{
"epoch": 0.004141436975094433,
"grad_norm": 0.00010674689110601321,
"learning_rate": 1.9922327952098786e-05,
"loss": 0.0585,
"step": 4800
},
{
"epoch": 0.004184576943585001,
"grad_norm": 12.699185371398926,
"learning_rate": 1.992146492934433e-05,
"loss": 0.1029,
"step": 4850
},
{
"epoch": 0.004227716912075567,
"grad_norm": 3.298513320260099e-06,
"learning_rate": 1.9920601906589873e-05,
"loss": 0.0596,
"step": 4900
},
{
"epoch": 0.004270856880566135,
"grad_norm": 7.301036021090113e-06,
"learning_rate": 1.9919738883835413e-05,
"loss": 0.0433,
"step": 4950
},
{
"epoch": 0.004313996849056701,
"grad_norm": 1.848353167588357e-05,
"learning_rate": 1.9918875861080956e-05,
"loss": 0.0439,
"step": 5000
},
{
"epoch": 0.004357136817547269,
"grad_norm": 3.848089909297414e-05,
"learning_rate": 1.9918012838326496e-05,
"loss": 0.1067,
"step": 5050
},
{
"epoch": 0.004400276786037835,
"grad_norm": 5.0859394832514226e-05,
"learning_rate": 1.991714981557204e-05,
"loss": 0.0592,
"step": 5100
},
{
"epoch": 0.004443416754528403,
"grad_norm": 3.35551449097693e-05,
"learning_rate": 1.991628679281758e-05,
"loss": 0.1294,
"step": 5150
},
{
"epoch": 0.004486556723018969,
"grad_norm": 0.4632960259914398,
"learning_rate": 1.9915423770063123e-05,
"loss": 0.0896,
"step": 5200
},
{
"epoch": 0.004529696691509537,
"grad_norm": 26.527536392211914,
"learning_rate": 1.9914560747308663e-05,
"loss": 0.0405,
"step": 5250
},
{
"epoch": 0.0045728366600001034,
"grad_norm": 1.0410542017780244e-05,
"learning_rate": 1.9913697724554206e-05,
"loss": 0.0509,
"step": 5300
},
{
"epoch": 0.004615976628490671,
"grad_norm": 29.268795013427734,
"learning_rate": 1.991283470179975e-05,
"loss": 0.0698,
"step": 5350
},
{
"epoch": 0.0046591165969812375,
"grad_norm": 4.8836263886187226e-05,
"learning_rate": 1.9911971679045293e-05,
"loss": 0.0155,
"step": 5400
},
{
"epoch": 0.004702256565471805,
"grad_norm": 0.228873610496521,
"learning_rate": 1.9911108656290833e-05,
"loss": 0.0254,
"step": 5450
},
{
"epoch": 0.0047453965339623715,
"grad_norm": 0.5368197560310364,
"learning_rate": 1.9910245633536376e-05,
"loss": 0.0429,
"step": 5500
},
{
"epoch": 0.004788536502452939,
"grad_norm": 2.2967957193031907e-05,
"learning_rate": 1.9909382610781916e-05,
"loss": 0.048,
"step": 5550
},
{
"epoch": 0.004831676470943506,
"grad_norm": 0.20427367091178894,
"learning_rate": 1.990851958802746e-05,
"loss": 0.0611,
"step": 5600
},
{
"epoch": 0.004874816439434073,
"grad_norm": 9.368510246276855,
"learning_rate": 1.9907656565273003e-05,
"loss": 0.0462,
"step": 5650
},
{
"epoch": 0.00491795640792464,
"grad_norm": 0.08957739174365997,
"learning_rate": 1.9906793542518543e-05,
"loss": 0.063,
"step": 5700
},
{
"epoch": 0.004961096376415207,
"grad_norm": 0.0012034045066684484,
"learning_rate": 1.9905930519764086e-05,
"loss": 0.0335,
"step": 5750
},
{
"epoch": 0.005004236344905774,
"grad_norm": 0.02072218433022499,
"learning_rate": 1.9905067497009626e-05,
"loss": 0.0799,
"step": 5800
},
{
"epoch": 0.005047376313396341,
"grad_norm": 0.008446129970252514,
"learning_rate": 1.990420447425517e-05,
"loss": 0.0395,
"step": 5850
},
{
"epoch": 0.005090516281886908,
"grad_norm": 47.39201736450195,
"learning_rate": 1.9903341451500713e-05,
"loss": 0.0857,
"step": 5900
},
{
"epoch": 0.005133656250377474,
"grad_norm": 4.237736720824614e-05,
"learning_rate": 1.9902478428746256e-05,
"loss": 0.1098,
"step": 5950
},
{
"epoch": 0.005176796218868042,
"grad_norm": 3.733102630576468e-06,
"learning_rate": 1.9901615405991796e-05,
"loss": 0.0516,
"step": 6000
},
{
"epoch": 0.005219936187358608,
"grad_norm": 0.0014495301293209195,
"learning_rate": 1.990075238323734e-05,
"loss": 0.009,
"step": 6050
},
{
"epoch": 0.005263076155849176,
"grad_norm": 1.5238803143802215e-06,
"learning_rate": 1.989988936048288e-05,
"loss": 0.065,
"step": 6100
},
{
"epoch": 0.005306216124339742,
"grad_norm": 3.455934120211168e-06,
"learning_rate": 1.9899026337728423e-05,
"loss": 0.0879,
"step": 6150
},
{
"epoch": 0.00534935609283031,
"grad_norm": 1.4700952988278004e-07,
"learning_rate": 1.9898163314973963e-05,
"loss": 0.047,
"step": 6200
},
{
"epoch": 0.005392496061320876,
"grad_norm": 0.3679034411907196,
"learning_rate": 1.9897300292219506e-05,
"loss": 0.0449,
"step": 6250
},
{
"epoch": 0.005435636029811444,
"grad_norm": 0.8546851873397827,
"learning_rate": 1.989643726946505e-05,
"loss": 0.0829,
"step": 6300
},
{
"epoch": 0.0054787759983020105,
"grad_norm": 0.003740283427760005,
"learning_rate": 1.989557424671059e-05,
"loss": 0.0324,
"step": 6350
},
{
"epoch": 0.005521915966792578,
"grad_norm": 0.11098367720842361,
"learning_rate": 1.9894711223956133e-05,
"loss": 0.0848,
"step": 6400
},
{
"epoch": 0.0055650559352831445,
"grad_norm": 6.6278211363624e-08,
"learning_rate": 1.9893848201201676e-05,
"loss": 0.0153,
"step": 6450
},
{
"epoch": 0.005608195903773712,
"grad_norm": 8.399548079296437e-08,
"learning_rate": 1.9892985178447216e-05,
"loss": 0.0509,
"step": 6500
},
{
"epoch": 0.0056513358722642786,
"grad_norm": 0.010032990016043186,
"learning_rate": 1.989212215569276e-05,
"loss": 0.0894,
"step": 6550
},
{
"epoch": 0.005694475840754846,
"grad_norm": 3.270921524745063e-06,
"learning_rate": 1.9891259132938302e-05,
"loss": 0.0484,
"step": 6600
},
{
"epoch": 0.005737615809245413,
"grad_norm": 4.165988445281982,
"learning_rate": 1.9890396110183842e-05,
"loss": 0.0521,
"step": 6650
},
{
"epoch": 0.00578075577773598,
"grad_norm": 0.16357873380184174,
"learning_rate": 1.9889533087429386e-05,
"loss": 0.0437,
"step": 6700
},
{
"epoch": 0.005823895746226547,
"grad_norm": 1.4861450381431496e-07,
"learning_rate": 1.9888670064674926e-05,
"loss": 0.0291,
"step": 6750
},
{
"epoch": 0.005867035714717114,
"grad_norm": 0.000343196967151016,
"learning_rate": 1.988780704192047e-05,
"loss": 0.0642,
"step": 6800
},
{
"epoch": 0.005910175683207681,
"grad_norm": 1.720488944556564e-05,
"learning_rate": 1.988694401916601e-05,
"loss": 0.0981,
"step": 6850
},
{
"epoch": 0.005953315651698248,
"grad_norm": 0.05200350657105446,
"learning_rate": 1.9886080996411552e-05,
"loss": 0.0184,
"step": 6900
},
{
"epoch": 0.005996455620188815,
"grad_norm": 23.398279190063477,
"learning_rate": 1.9885217973657096e-05,
"loss": 0.049,
"step": 6950
},
{
"epoch": 0.006039595588679382,
"grad_norm": 5.3464435040950775e-05,
"learning_rate": 1.988435495090264e-05,
"loss": 0.0248,
"step": 7000
},
{
"epoch": 0.006082735557169949,
"grad_norm": 0.01494416780769825,
"learning_rate": 1.988349192814818e-05,
"loss": 0.0294,
"step": 7050
},
{
"epoch": 0.006125875525660516,
"grad_norm": 6.322508852463216e-05,
"learning_rate": 1.9882628905393722e-05,
"loss": 0.0075,
"step": 7100
},
{
"epoch": 0.006169015494151083,
"grad_norm": 0.007586951367557049,
"learning_rate": 1.9881765882639262e-05,
"loss": 0.052,
"step": 7150
},
{
"epoch": 0.00621215546264165,
"grad_norm": 2.4987362873263308e-11,
"learning_rate": 1.9880902859884806e-05,
"loss": 0.0137,
"step": 7200
},
{
"epoch": 0.006255295431132217,
"grad_norm": 8.16138744354248,
"learning_rate": 1.9880039837130345e-05,
"loss": 0.109,
"step": 7250
},
{
"epoch": 0.006298435399622784,
"grad_norm": 0.002273560268804431,
"learning_rate": 1.987917681437589e-05,
"loss": 0.0204,
"step": 7300
},
{
"epoch": 0.006341575368113351,
"grad_norm": 0.00022486828675027937,
"learning_rate": 1.9878313791621432e-05,
"loss": 0.0764,
"step": 7350
},
{
"epoch": 0.006384715336603918,
"grad_norm": 0.00014589431521017104,
"learning_rate": 1.9877450768866972e-05,
"loss": 0.0407,
"step": 7400
},
{
"epoch": 0.006427855305094485,
"grad_norm": 0.0005719369510188699,
"learning_rate": 1.9876587746112515e-05,
"loss": 0.0648,
"step": 7450
},
{
"epoch": 0.006470995273585052,
"grad_norm": 3.020178610313451e-06,
"learning_rate": 1.987572472335806e-05,
"loss": 0.0525,
"step": 7500
},
{
"epoch": 0.006514135242075619,
"grad_norm": 4.380962934646959e-07,
"learning_rate": 1.9874861700603602e-05,
"loss": 0.0392,
"step": 7550
},
{
"epoch": 0.0065572752105661864,
"grad_norm": 1.5524530681432225e-05,
"learning_rate": 1.9873998677849142e-05,
"loss": 0.047,
"step": 7600
},
{
"epoch": 0.006600415179056753,
"grad_norm": 1.0878498869715258e-05,
"learning_rate": 1.9873135655094685e-05,
"loss": 0.0453,
"step": 7650
},
{
"epoch": 0.0066435551475473205,
"grad_norm": 10.473357200622559,
"learning_rate": 1.9872272632340225e-05,
"loss": 0.0621,
"step": 7700
},
{
"epoch": 0.006686695116037887,
"grad_norm": 0.05818796157836914,
"learning_rate": 1.987140960958577e-05,
"loss": 0.0403,
"step": 7750
},
{
"epoch": 0.0067298350845284545,
"grad_norm": 3.924364833096661e-09,
"learning_rate": 1.987054658683131e-05,
"loss": 0.0317,
"step": 7800
},
{
"epoch": 0.006772975053019021,
"grad_norm": 3.545212848621304e-06,
"learning_rate": 1.9869683564076852e-05,
"loss": 0.0633,
"step": 7850
},
{
"epoch": 0.006816115021509589,
"grad_norm": 0.17004750669002533,
"learning_rate": 1.9868820541322392e-05,
"loss": 0.0147,
"step": 7900
},
{
"epoch": 0.006859254990000155,
"grad_norm": 5.974680243525654e-05,
"learning_rate": 1.9867957518567935e-05,
"loss": 0.0579,
"step": 7950
},
{
"epoch": 0.006902394958490723,
"grad_norm": 3.9863412126806e-08,
"learning_rate": 1.986709449581348e-05,
"loss": 0.0248,
"step": 8000
},
{
"epoch": 0.006945534926981289,
"grad_norm": 0.8195998668670654,
"learning_rate": 1.9866231473059022e-05,
"loss": 0.1239,
"step": 8050
},
{
"epoch": 0.006988674895471857,
"grad_norm": 0.0003940521564800292,
"learning_rate": 1.9865368450304562e-05,
"loss": 0.0727,
"step": 8100
},
{
"epoch": 0.007031814863962423,
"grad_norm": 0.0001462361979065463,
"learning_rate": 1.9864505427550105e-05,
"loss": 0.0264,
"step": 8150
},
{
"epoch": 0.007074954832452991,
"grad_norm": 4.075237214351546e-08,
"learning_rate": 1.9863642404795645e-05,
"loss": 0.0154,
"step": 8200
},
{
"epoch": 0.007118094800943557,
"grad_norm": 5.172235432837624e-06,
"learning_rate": 1.986277938204119e-05,
"loss": 0.0256,
"step": 8250
},
{
"epoch": 0.007161234769434125,
"grad_norm": 0.0007250295020639896,
"learning_rate": 1.9861916359286732e-05,
"loss": 0.0325,
"step": 8300
},
{
"epoch": 0.007204374737924691,
"grad_norm": 8.068302154541016,
"learning_rate": 1.9861053336532272e-05,
"loss": 0.032,
"step": 8350
},
{
"epoch": 0.007247514706415259,
"grad_norm": 11.65196704864502,
"learning_rate": 1.9860190313777815e-05,
"loss": 0.006,
"step": 8400
},
{
"epoch": 0.007290654674905825,
"grad_norm": 1.6602513808194885e-09,
"learning_rate": 1.9859327291023355e-05,
"loss": 0.0565,
"step": 8450
},
{
"epoch": 0.007333794643396393,
"grad_norm": 0.22325988113880157,
"learning_rate": 1.98584642682689e-05,
"loss": 0.0493,
"step": 8500
},
{
"epoch": 0.007376934611886959,
"grad_norm": 0.0023358704056590796,
"learning_rate": 1.985760124551444e-05,
"loss": 0.0061,
"step": 8550
},
{
"epoch": 0.007420074580377527,
"grad_norm": 1.97016873926259e-07,
"learning_rate": 1.9856738222759985e-05,
"loss": 0.0704,
"step": 8600
},
{
"epoch": 0.0074632145488680935,
"grad_norm": 0.0003019660944119096,
"learning_rate": 1.9855875200005525e-05,
"loss": 0.0156,
"step": 8650
},
{
"epoch": 0.007506354517358661,
"grad_norm": 0.014269077219069004,
"learning_rate": 1.9855012177251068e-05,
"loss": 0.0606,
"step": 8700
},
{
"epoch": 0.0075494944858492275,
"grad_norm": 0.010774667374789715,
"learning_rate": 1.9854149154496608e-05,
"loss": 0.0129,
"step": 8750
},
{
"epoch": 0.007592634454339795,
"grad_norm": 14.643841743469238,
"learning_rate": 1.985328613174215e-05,
"loss": 0.0368,
"step": 8800
},
{
"epoch": 0.0076357744228303616,
"grad_norm": 0.004390218295156956,
"learning_rate": 1.985242310898769e-05,
"loss": 0.0193,
"step": 8850
},
{
"epoch": 0.007678914391320929,
"grad_norm": 0.00026494322810322046,
"learning_rate": 1.9851560086233235e-05,
"loss": 0.0377,
"step": 8900
},
{
"epoch": 0.007722054359811496,
"grad_norm": 0.3454723656177521,
"learning_rate": 1.9850697063478778e-05,
"loss": 0.0271,
"step": 8950
},
{
"epoch": 0.007765194328302062,
"grad_norm": 1.240284319692364e-07,
"learning_rate": 1.9849834040724318e-05,
"loss": 0.0312,
"step": 9000
},
{
"epoch": 0.00780833429679263,
"grad_norm": 0.0001445577945560217,
"learning_rate": 1.984897101796986e-05,
"loss": 0.0305,
"step": 9050
},
{
"epoch": 0.007851474265283197,
"grad_norm": 4.175523482530252e-09,
"learning_rate": 1.9848107995215405e-05,
"loss": 0.0979,
"step": 9100
},
{
"epoch": 0.007894614233773764,
"grad_norm": 0.035435471683740616,
"learning_rate": 1.9847244972460945e-05,
"loss": 0.0098,
"step": 9150
},
{
"epoch": 0.00793775420226433,
"grad_norm": 26.931116104125977,
"learning_rate": 1.9846381949706488e-05,
"loss": 0.0262,
"step": 9200
},
{
"epoch": 0.007980894170754897,
"grad_norm": 8.122495273710229e-06,
"learning_rate": 1.984551892695203e-05,
"loss": 0.0602,
"step": 9250
},
{
"epoch": 0.008024034139245465,
"grad_norm": 2.0076650411593455e-11,
"learning_rate": 1.984465590419757e-05,
"loss": 0.0514,
"step": 9300
},
{
"epoch": 0.008067174107736032,
"grad_norm": 2.9286837843756075e-08,
"learning_rate": 1.9843792881443115e-05,
"loss": 0.0657,
"step": 9350
},
{
"epoch": 0.008110314076226598,
"grad_norm": 1.5581694841384888,
"learning_rate": 1.9842929858688655e-05,
"loss": 0.0335,
"step": 9400
},
{
"epoch": 0.008153454044717165,
"grad_norm": 3.392365144350151e-08,
"learning_rate": 1.9842066835934198e-05,
"loss": 0.0701,
"step": 9450
},
{
"epoch": 0.008196594013207733,
"grad_norm": 0.03891870751976967,
"learning_rate": 1.9841203813179738e-05,
"loss": 0.1115,
"step": 9500
},
{
"epoch": 0.0082397339816983,
"grad_norm": 5.497531890869141,
"learning_rate": 1.984034079042528e-05,
"loss": 0.0065,
"step": 9550
},
{
"epoch": 0.008282873950188866,
"grad_norm": 0.0006867619813419878,
"learning_rate": 1.9839477767670825e-05,
"loss": 0.0231,
"step": 9600
},
{
"epoch": 0.008326013918679433,
"grad_norm": 0.000866669462993741,
"learning_rate": 1.9838614744916368e-05,
"loss": 0.0234,
"step": 9650
},
{
"epoch": 0.008369153887170001,
"grad_norm": 0.061681024730205536,
"learning_rate": 1.9837751722161908e-05,
"loss": 0.0371,
"step": 9700
},
{
"epoch": 0.008412293855660568,
"grad_norm": 7.284898515536042e-07,
"learning_rate": 1.983688869940745e-05,
"loss": 0.039,
"step": 9750
},
{
"epoch": 0.008455433824151135,
"grad_norm": 5.737701980201848e-10,
"learning_rate": 1.983602567665299e-05,
"loss": 0.0145,
"step": 9800
},
{
"epoch": 0.008498573792641701,
"grad_norm": 6.553115099450224e-07,
"learning_rate": 1.9835162653898534e-05,
"loss": 0.024,
"step": 9850
},
{
"epoch": 0.00854171376113227,
"grad_norm": 7.23102075994575e-08,
"learning_rate": 1.9834299631144074e-05,
"loss": 0.0458,
"step": 9900
},
{
"epoch": 0.008584853729622836,
"grad_norm": 5.95320443608216e-06,
"learning_rate": 1.9833436608389618e-05,
"loss": 0.0918,
"step": 9950
},
{
"epoch": 0.008627993698113403,
"grad_norm": 7.1469521571998484e-06,
"learning_rate": 1.983257358563516e-05,
"loss": 0.0287,
"step": 10000
},
{
"epoch": 0.00867113366660397,
"grad_norm": 0.00036231454578228295,
"learning_rate": 1.98317105628807e-05,
"loss": 0.0284,
"step": 10050
},
{
"epoch": 0.008714273635094538,
"grad_norm": 8.159648132277653e-05,
"learning_rate": 1.9830847540126244e-05,
"loss": 0.0589,
"step": 10100
},
{
"epoch": 0.008757413603585104,
"grad_norm": 0.0002320503263035789,
"learning_rate": 1.9829984517371788e-05,
"loss": 0.0296,
"step": 10150
},
{
"epoch": 0.00880055357207567,
"grad_norm": 0.001181815518066287,
"learning_rate": 1.982912149461733e-05,
"loss": 0.0244,
"step": 10200
},
{
"epoch": 0.008843693540566237,
"grad_norm": 2.497093198883249e-09,
"learning_rate": 1.982825847186287e-05,
"loss": 0.0337,
"step": 10250
},
{
"epoch": 0.008886833509056806,
"grad_norm": 0.00030890764901414514,
"learning_rate": 1.9827395449108414e-05,
"loss": 0.033,
"step": 10300
},
{
"epoch": 0.008929973477547372,
"grad_norm": 24.577367782592773,
"learning_rate": 1.9826532426353954e-05,
"loss": 0.0135,
"step": 10350
},
{
"epoch": 0.008973113446037939,
"grad_norm": 1.9483505487442017,
"learning_rate": 1.9825669403599498e-05,
"loss": 0.0299,
"step": 10400
},
{
"epoch": 0.009016253414528505,
"grad_norm": 0.0004972516908310354,
"learning_rate": 1.9824806380845038e-05,
"loss": 0.0107,
"step": 10450
},
{
"epoch": 0.009059393383019074,
"grad_norm": 1.4932817649082608e-08,
"learning_rate": 1.982394335809058e-05,
"loss": 0.0567,
"step": 10500
},
{
"epoch": 0.00910253335150964,
"grad_norm": 0.004500082693994045,
"learning_rate": 1.982308033533612e-05,
"loss": 0.0163,
"step": 10550
},
{
"epoch": 0.009145673320000207,
"grad_norm": 6.5830713538161945e-06,
"learning_rate": 1.9822217312581664e-05,
"loss": 0.0359,
"step": 10600
},
{
"epoch": 0.009188813288490773,
"grad_norm": 0.09253023564815521,
"learning_rate": 1.9821354289827207e-05,
"loss": 0.0179,
"step": 10650
},
{
"epoch": 0.009231953256981342,
"grad_norm": 0.004253961145877838,
"learning_rate": 1.982049126707275e-05,
"loss": 0.0433,
"step": 10700
},
{
"epoch": 0.009275093225471908,
"grad_norm": 0.0014189484063535929,
"learning_rate": 1.981962824431829e-05,
"loss": 0.0467,
"step": 10750
},
{
"epoch": 0.009318233193962475,
"grad_norm": 0.0005026152357459068,
"learning_rate": 1.9818765221563834e-05,
"loss": 0.0137,
"step": 10800
},
{
"epoch": 0.009361373162453042,
"grad_norm": 0.003253827104344964,
"learning_rate": 1.9817902198809374e-05,
"loss": 0.0135,
"step": 10850
},
{
"epoch": 0.00940451313094361,
"grad_norm": 0.5753559470176697,
"learning_rate": 1.9817039176054917e-05,
"loss": 0.0707,
"step": 10900
},
{
"epoch": 0.009447653099434176,
"grad_norm": 2.7666785626934143e-06,
"learning_rate": 1.981617615330046e-05,
"loss": 0.0042,
"step": 10950
},
{
"epoch": 0.009490793067924743,
"grad_norm": 0.0010713053634390235,
"learning_rate": 1.9815313130546e-05,
"loss": 0.0248,
"step": 11000
},
{
"epoch": 0.00953393303641531,
"grad_norm": 0.00012337288353592157,
"learning_rate": 1.9814450107791544e-05,
"loss": 0.0101,
"step": 11050
},
{
"epoch": 0.009577073004905878,
"grad_norm": 2.0991153704130738e-08,
"learning_rate": 1.9813587085037084e-05,
"loss": 0.0319,
"step": 11100
},
{
"epoch": 0.009620212973396445,
"grad_norm": 0.0001735202531563118,
"learning_rate": 1.9812724062282627e-05,
"loss": 0.0065,
"step": 11150
},
{
"epoch": 0.009663352941887011,
"grad_norm": 0.0007401935290545225,
"learning_rate": 1.981186103952817e-05,
"loss": 0.0184,
"step": 11200
},
{
"epoch": 0.009706492910377578,
"grad_norm": 5.382436825129844e-07,
"learning_rate": 1.9810998016773714e-05,
"loss": 0.0766,
"step": 11250
},
{
"epoch": 0.009749632878868146,
"grad_norm": 5.5672944654361345e-06,
"learning_rate": 1.9810134994019254e-05,
"loss": 0.0082,
"step": 11300
},
{
"epoch": 0.009792772847358713,
"grad_norm": 2.2267850852131232e-07,
"learning_rate": 1.9809271971264797e-05,
"loss": 0.019,
"step": 11350
},
{
"epoch": 0.00983591281584928,
"grad_norm": 0.23477919399738312,
"learning_rate": 1.9808408948510337e-05,
"loss": 0.0295,
"step": 11400
},
{
"epoch": 0.009879052784339846,
"grad_norm": 9.228908304237393e-09,
"learning_rate": 1.980754592575588e-05,
"loss": 0.0575,
"step": 11450
},
{
"epoch": 0.009922192752830414,
"grad_norm": 0.00020697819127235562,
"learning_rate": 1.980668290300142e-05,
"loss": 0.0269,
"step": 11500
},
{
"epoch": 0.00996533272132098,
"grad_norm": 0.19181561470031738,
"learning_rate": 1.9805819880246964e-05,
"loss": 0.0093,
"step": 11550
},
{
"epoch": 0.010008472689811547,
"grad_norm": 3.362165080034174e-05,
"learning_rate": 1.9804956857492507e-05,
"loss": 0.0373,
"step": 11600
},
{
"epoch": 0.010051612658302114,
"grad_norm": 0.3552068769931793,
"learning_rate": 1.9804093834738047e-05,
"loss": 0.0599,
"step": 11650
},
{
"epoch": 0.010094752626792682,
"grad_norm": 1.6512422007508576e-05,
"learning_rate": 1.980323081198359e-05,
"loss": 0.0255,
"step": 11700
},
{
"epoch": 0.010137892595283249,
"grad_norm": 6.555333614349365,
"learning_rate": 1.9802367789229134e-05,
"loss": 0.0162,
"step": 11750
},
{
"epoch": 0.010181032563773815,
"grad_norm": 2.48828387260437,
"learning_rate": 1.9801504766474674e-05,
"loss": 0.0377,
"step": 11800
},
{
"epoch": 0.010224172532264382,
"grad_norm": 0.005198315717279911,
"learning_rate": 1.9800641743720217e-05,
"loss": 0.0071,
"step": 11850
},
{
"epoch": 0.010267312500754949,
"grad_norm": 0.0014223635662347078,
"learning_rate": 1.979977872096576e-05,
"loss": 0.0286,
"step": 11900
},
{
"epoch": 0.010310452469245517,
"grad_norm": 1.555037556499883e-06,
"learning_rate": 1.97989156982113e-05,
"loss": 0.0429,
"step": 11950
},
{
"epoch": 0.010353592437736083,
"grad_norm": 7.9471330642700195,
"learning_rate": 1.9798052675456844e-05,
"loss": 0.0401,
"step": 12000
},
{
"epoch": 0.01039673240622665,
"grad_norm": 0.0001056401088135317,
"learning_rate": 1.9797189652702384e-05,
"loss": 0.008,
"step": 12050
},
{
"epoch": 0.010439872374717217,
"grad_norm": 3.85151979571674e-05,
"learning_rate": 1.9796326629947927e-05,
"loss": 0.0309,
"step": 12100
},
{
"epoch": 0.010483012343207785,
"grad_norm": 16.898605346679688,
"learning_rate": 1.9795463607193467e-05,
"loss": 0.0234,
"step": 12150
},
{
"epoch": 0.010526152311698352,
"grad_norm": 3.0313758170308347e-09,
"learning_rate": 1.979460058443901e-05,
"loss": 0.0237,
"step": 12200
},
{
"epoch": 0.010569292280188918,
"grad_norm": 0.0001202192361233756,
"learning_rate": 1.9793737561684553e-05,
"loss": 0.026,
"step": 12250
},
{
"epoch": 0.010612432248679485,
"grad_norm": 1.240387376144625e-10,
"learning_rate": 1.9792874538930097e-05,
"loss": 0.0527,
"step": 12300
},
{
"epoch": 0.010655572217170053,
"grad_norm": 11.890090942382812,
"learning_rate": 1.9792011516175637e-05,
"loss": 0.0577,
"step": 12350
},
{
"epoch": 0.01069871218566062,
"grad_norm": 2.300609958183486e-05,
"learning_rate": 1.979114849342118e-05,
"loss": 0.0413,
"step": 12400
},
{
"epoch": 0.010741852154151186,
"grad_norm": 3.607681719586253e-05,
"learning_rate": 1.979028547066672e-05,
"loss": 0.056,
"step": 12450
},
{
"epoch": 0.010784992122641753,
"grad_norm": 0.007184322457760572,
"learning_rate": 1.9789422447912263e-05,
"loss": 0.0356,
"step": 12500
},
{
"epoch": 0.010828132091132321,
"grad_norm": 0.03649460896849632,
"learning_rate": 1.9788559425157807e-05,
"loss": 0.0653,
"step": 12550
},
{
"epoch": 0.010871272059622888,
"grad_norm": 2.2537233235198073e-05,
"learning_rate": 1.9787696402403347e-05,
"loss": 0.0298,
"step": 12600
},
{
"epoch": 0.010914412028113454,
"grad_norm": 0.012440712191164494,
"learning_rate": 1.978683337964889e-05,
"loss": 0.0027,
"step": 12650
},
{
"epoch": 0.010957551996604021,
"grad_norm": 0.0001454145967727527,
"learning_rate": 1.978597035689443e-05,
"loss": 0.0428,
"step": 12700
},
{
"epoch": 0.01100069196509459,
"grad_norm": 4.73512305754209e-11,
"learning_rate": 1.9785107334139973e-05,
"loss": 0.0266,
"step": 12750
},
{
"epoch": 0.011043831933585156,
"grad_norm": 0.4899098873138428,
"learning_rate": 1.9784244311385517e-05,
"loss": 0.0423,
"step": 12800
},
{
"epoch": 0.011086971902075722,
"grad_norm": 3.1542436772724614e-05,
"learning_rate": 1.978338128863106e-05,
"loss": 0.0201,
"step": 12850
},
{
"epoch": 0.011130111870566289,
"grad_norm": 2.1234811242720752e-08,
"learning_rate": 1.97825182658766e-05,
"loss": 0.0602,
"step": 12900
},
{
"epoch": 0.011173251839056857,
"grad_norm": 2.4936113174334196e-09,
"learning_rate": 1.9781655243122143e-05,
"loss": 0.0289,
"step": 12950
},
{
"epoch": 0.011216391807547424,
"grad_norm": 7.835155884095002e-07,
"learning_rate": 1.9780792220367683e-05,
"loss": 0.0126,
"step": 13000
},
{
"epoch": 0.01125953177603799,
"grad_norm": 3.1845395369600737e-06,
"learning_rate": 1.9779929197613227e-05,
"loss": 0.0133,
"step": 13050
},
{
"epoch": 0.011302671744528557,
"grad_norm": 6.416823072896705e-09,
"learning_rate": 1.9779066174858766e-05,
"loss": 0.0413,
"step": 13100
},
{
"epoch": 0.011345811713019125,
"grad_norm": 6.80740213394165,
"learning_rate": 1.977820315210431e-05,
"loss": 0.0443,
"step": 13150
},
{
"epoch": 0.011388951681509692,
"grad_norm": 0.012771312147378922,
"learning_rate": 1.977734012934985e-05,
"loss": 0.0383,
"step": 13200
},
{
"epoch": 0.011432091650000259,
"grad_norm": 0.0008403750252909958,
"learning_rate": 1.9776477106595393e-05,
"loss": 0.0434,
"step": 13250
},
{
"epoch": 0.011475231618490825,
"grad_norm": 1.2084444761276245,
"learning_rate": 1.9775614083840936e-05,
"loss": 0.0066,
"step": 13300
},
{
"epoch": 0.011518371586981394,
"grad_norm": 9.330961781017777e-09,
"learning_rate": 1.977475106108648e-05,
"loss": 0.0325,
"step": 13350
},
{
"epoch": 0.01156151155547196,
"grad_norm": 0.00011164277384523302,
"learning_rate": 1.977388803833202e-05,
"loss": 0.0956,
"step": 13400
},
{
"epoch": 0.011604651523962527,
"grad_norm": 2.7169560326001374e-06,
"learning_rate": 1.9773025015577563e-05,
"loss": 0.0207,
"step": 13450
},
{
"epoch": 0.011647791492453093,
"grad_norm": 0.0006356360972858965,
"learning_rate": 1.9772161992823103e-05,
"loss": 0.0045,
"step": 13500
},
{
"epoch": 0.011690931460943662,
"grad_norm": 6.926347850821912e-05,
"learning_rate": 1.9771298970068646e-05,
"loss": 0.0176,
"step": 13550
},
{
"epoch": 0.011734071429434228,
"grad_norm": 0.00017402067896910012,
"learning_rate": 1.977043594731419e-05,
"loss": 0.0284,
"step": 13600
},
{
"epoch": 0.011777211397924795,
"grad_norm": 1.069779334561538e-10,
"learning_rate": 1.976957292455973e-05,
"loss": 0.0292,
"step": 13650
},
{
"epoch": 0.011820351366415361,
"grad_norm": 0.17523643374443054,
"learning_rate": 1.9768709901805273e-05,
"loss": 0.0273,
"step": 13700
},
{
"epoch": 0.01186349133490593,
"grad_norm": 9.821783065795898,
"learning_rate": 1.9767846879050813e-05,
"loss": 0.0706,
"step": 13750
},
{
"epoch": 0.011906631303396496,
"grad_norm": 1.8948287561215693e-07,
"learning_rate": 1.9766983856296356e-05,
"loss": 0.0165,
"step": 13800
},
{
"epoch": 0.011949771271887063,
"grad_norm": 5.998489086778136e-06,
"learning_rate": 1.97661208335419e-05,
"loss": 0.0372,
"step": 13850
},
{
"epoch": 0.01199291124037763,
"grad_norm": 1.5009301900863647,
"learning_rate": 1.9765257810787443e-05,
"loss": 0.0387,
"step": 13900
},
{
"epoch": 0.012036051208868198,
"grad_norm": 5.223755650263229e-09,
"learning_rate": 1.9764394788032983e-05,
"loss": 0.0828,
"step": 13950
},
{
"epoch": 0.012079191177358764,
"grad_norm": 0.1192856878042221,
"learning_rate": 1.9763531765278526e-05,
"loss": 0.0286,
"step": 14000
},
{
"epoch": 0.012122331145849331,
"grad_norm": 5.815771601191955e-06,
"learning_rate": 1.9762668742524066e-05,
"loss": 0.0541,
"step": 14050
},
{
"epoch": 0.012165471114339898,
"grad_norm": 11.029925346374512,
"learning_rate": 1.976180571976961e-05,
"loss": 0.039,
"step": 14100
},
{
"epoch": 0.012208611082830466,
"grad_norm": 0.00015492299280595034,
"learning_rate": 1.976094269701515e-05,
"loss": 0.0354,
"step": 14150
},
{
"epoch": 0.012251751051321032,
"grad_norm": 4.5061292439640965e-06,
"learning_rate": 1.9760079674260693e-05,
"loss": 0.0364,
"step": 14200
},
{
"epoch": 0.012294891019811599,
"grad_norm": 0.45702916383743286,
"learning_rate": 1.9759216651506236e-05,
"loss": 0.0313,
"step": 14250
},
{
"epoch": 0.012338030988302166,
"grad_norm": 1.0066764311034149e-08,
"learning_rate": 1.9758353628751776e-05,
"loss": 0.0344,
"step": 14300
},
{
"epoch": 0.012381170956792734,
"grad_norm": 7.227523610708886e-07,
"learning_rate": 1.975749060599732e-05,
"loss": 0.0351,
"step": 14350
},
{
"epoch": 0.0124243109252833,
"grad_norm": 5.080125653478262e-09,
"learning_rate": 1.9756627583242863e-05,
"loss": 0.0136,
"step": 14400
},
{
"epoch": 0.012467450893773867,
"grad_norm": 0.016180645674467087,
"learning_rate": 1.9755764560488403e-05,
"loss": 0.0407,
"step": 14450
},
{
"epoch": 0.012510590862264434,
"grad_norm": 0.061310265213251114,
"learning_rate": 1.9754901537733946e-05,
"loss": 0.01,
"step": 14500
},
{
"epoch": 0.012553730830755002,
"grad_norm": 7.248584552144166e-06,
"learning_rate": 1.975403851497949e-05,
"loss": 0.0178,
"step": 14550
},
{
"epoch": 0.012596870799245569,
"grad_norm": 7.203379154205322,
"learning_rate": 1.975317549222503e-05,
"loss": 0.062,
"step": 14600
},
{
"epoch": 0.012640010767736135,
"grad_norm": 1.126842835219577e-05,
"learning_rate": 1.9752312469470573e-05,
"loss": 0.0173,
"step": 14650
},
{
"epoch": 0.012683150736226702,
"grad_norm": 0.0011432298924773932,
"learning_rate": 1.9751449446716112e-05,
"loss": 0.0923,
"step": 14700
},
{
"epoch": 0.01272629070471727,
"grad_norm": 1.9043671954932506e-06,
"learning_rate": 1.9750586423961656e-05,
"loss": 0.047,
"step": 14750
},
{
"epoch": 0.012769430673207837,
"grad_norm": 0.20942749083042145,
"learning_rate": 1.9749723401207196e-05,
"loss": 0.0156,
"step": 14800
},
{
"epoch": 0.012812570641698403,
"grad_norm": 1.760947014872727e-07,
"learning_rate": 1.974886037845274e-05,
"loss": 0.0481,
"step": 14850
},
{
"epoch": 0.01285571061018897,
"grad_norm": 2.280950639033108e-06,
"learning_rate": 1.9747997355698282e-05,
"loss": 0.0073,
"step": 14900
},
{
"epoch": 0.012898850578679536,
"grad_norm": 0.019771773368120193,
"learning_rate": 1.9747134332943826e-05,
"loss": 0.0109,
"step": 14950
},
{
"epoch": 0.012941990547170105,
"grad_norm": 0.7483711838722229,
"learning_rate": 1.9746271310189366e-05,
"loss": 0.0025,
"step": 15000
},
{
"epoch": 0.012985130515660671,
"grad_norm": 0.0011623813770711422,
"learning_rate": 1.974540828743491e-05,
"loss": 0.0097,
"step": 15050
},
{
"epoch": 0.013028270484151238,
"grad_norm": 0.00023206142941489816,
"learning_rate": 1.974454526468045e-05,
"loss": 0.0211,
"step": 15100
},
{
"epoch": 0.013071410452641805,
"grad_norm": 8.044224841796677e-07,
"learning_rate": 1.9743682241925992e-05,
"loss": 0.0678,
"step": 15150
},
{
"epoch": 0.013114550421132373,
"grad_norm": 2.867023241037714e-08,
"learning_rate": 1.9742819219171536e-05,
"loss": 0.0332,
"step": 15200
},
{
"epoch": 0.01315769038962294,
"grad_norm": 3.529981640326696e-08,
"learning_rate": 1.9741956196417076e-05,
"loss": 0.0811,
"step": 15250
},
{
"epoch": 0.013200830358113506,
"grad_norm": 3.617996844695881e-05,
"learning_rate": 1.974109317366262e-05,
"loss": 0.0281,
"step": 15300
},
{
"epoch": 0.013243970326604073,
"grad_norm": 0.0002957701508421451,
"learning_rate": 1.974023015090816e-05,
"loss": 0.0005,
"step": 15350
},
{
"epoch": 0.013287110295094641,
"grad_norm": 0.1449277251958847,
"learning_rate": 1.9739367128153702e-05,
"loss": 0.026,
"step": 15400
},
{
"epoch": 0.013330250263585208,
"grad_norm": 3.980770713063464e-10,
"learning_rate": 1.9738504105399246e-05,
"loss": 0.0121,
"step": 15450
},
{
"epoch": 0.013373390232075774,
"grad_norm": 6.5806302629312086e-09,
"learning_rate": 1.973764108264479e-05,
"loss": 0.0131,
"step": 15500
},
{
"epoch": 0.01341653020056634,
"grad_norm": 10.989927291870117,
"learning_rate": 1.973677805989033e-05,
"loss": 0.0375,
"step": 15550
},
{
"epoch": 0.013459670169056909,
"grad_norm": 0.028256021440029144,
"learning_rate": 1.9735915037135872e-05,
"loss": 0.005,
"step": 15600
},
{
"epoch": 0.013502810137547476,
"grad_norm": 3.129288234049454e-05,
"learning_rate": 1.9735052014381412e-05,
"loss": 0.0145,
"step": 15650
},
{
"epoch": 0.013545950106038042,
"grad_norm": 1.9001586970546214e-09,
"learning_rate": 1.9734188991626955e-05,
"loss": 0.0662,
"step": 15700
},
{
"epoch": 0.013589090074528609,
"grad_norm": 2.575715734565165e-05,
"learning_rate": 1.9733325968872495e-05,
"loss": 0.0169,
"step": 15750
},
{
"epoch": 0.013632230043019177,
"grad_norm": 0.00017570947238709778,
"learning_rate": 1.973246294611804e-05,
"loss": 0.094,
"step": 15800
},
{
"epoch": 0.013675370011509744,
"grad_norm": 2.5118701563187074e-10,
"learning_rate": 1.973159992336358e-05,
"loss": 0.0255,
"step": 15850
},
{
"epoch": 0.01371850998000031,
"grad_norm": 4.180213952764689e-09,
"learning_rate": 1.9730736900609122e-05,
"loss": 0.0447,
"step": 15900
},
{
"epoch": 0.013761649948490877,
"grad_norm": 9.289252744792975e-08,
"learning_rate": 1.9729873877854665e-05,
"loss": 0.0102,
"step": 15950
},
{
"epoch": 0.013804789916981445,
"grad_norm": 9.842972659157567e-09,
"learning_rate": 1.972901085510021e-05,
"loss": 0.0263,
"step": 16000
},
{
"epoch": 0.013847929885472012,
"grad_norm": 5.562032222747803,
"learning_rate": 1.972814783234575e-05,
"loss": 0.0495,
"step": 16050
},
{
"epoch": 0.013891069853962578,
"grad_norm": 0.07973814755678177,
"learning_rate": 1.9727284809591292e-05,
"loss": 0.0315,
"step": 16100
},
{
"epoch": 0.013934209822453145,
"grad_norm": 4.2199195604553097e-07,
"learning_rate": 1.9726421786836832e-05,
"loss": 0.0532,
"step": 16150
},
{
"epoch": 0.013977349790943713,
"grad_norm": 0.00012909203360322863,
"learning_rate": 1.9725558764082375e-05,
"loss": 0.0331,
"step": 16200
},
{
"epoch": 0.01402048975943428,
"grad_norm": 0.023724447935819626,
"learning_rate": 1.972469574132792e-05,
"loss": 0.0243,
"step": 16250
},
{
"epoch": 0.014063629727924847,
"grad_norm": 7.801064384693746e-06,
"learning_rate": 1.972383271857346e-05,
"loss": 0.0729,
"step": 16300
},
{
"epoch": 0.014106769696415413,
"grad_norm": 0.00012842965952586383,
"learning_rate": 1.9722969695819002e-05,
"loss": 0.0076,
"step": 16350
},
{
"epoch": 0.014149909664905981,
"grad_norm": 3.283237148821172e-08,
"learning_rate": 1.9722106673064542e-05,
"loss": 0.0496,
"step": 16400
},
{
"epoch": 0.014193049633396548,
"grad_norm": 5.932063174007851e-10,
"learning_rate": 1.9721243650310085e-05,
"loss": 0.0793,
"step": 16450
},
{
"epoch": 0.014236189601887115,
"grad_norm": 0.07802320271730423,
"learning_rate": 1.972038062755563e-05,
"loss": 0.0157,
"step": 16500
},
{
"epoch": 0.014279329570377681,
"grad_norm": 1.4036957907137548e-07,
"learning_rate": 1.9719517604801172e-05,
"loss": 0.0221,
"step": 16550
},
{
"epoch": 0.01432246953886825,
"grad_norm": 3.236153389707397e-08,
"learning_rate": 1.9718654582046712e-05,
"loss": 0.0001,
"step": 16600
},
{
"epoch": 0.014365609507358816,
"grad_norm": 3.180664539337158,
"learning_rate": 1.9717791559292255e-05,
"loss": 0.0684,
"step": 16650
},
{
"epoch": 0.014408749475849383,
"grad_norm": 6.371417839545757e-06,
"learning_rate": 1.9716928536537795e-05,
"loss": 0.006,
"step": 16700
},
{
"epoch": 0.01445188944433995,
"grad_norm": 6.981757906032726e-06,
"learning_rate": 1.971606551378334e-05,
"loss": 0.0743,
"step": 16750
},
{
"epoch": 0.014495029412830518,
"grad_norm": 0.9886574745178223,
"learning_rate": 1.9715202491028878e-05,
"loss": 0.0285,
"step": 16800
},
{
"epoch": 0.014538169381321084,
"grad_norm": 36.159725189208984,
"learning_rate": 1.971433946827442e-05,
"loss": 0.0159,
"step": 16850
},
{
"epoch": 0.01458130934981165,
"grad_norm": 4.837416648864746,
"learning_rate": 1.9713476445519965e-05,
"loss": 0.0125,
"step": 16900
},
{
"epoch": 0.014624449318302217,
"grad_norm": 4.346982677816413e-06,
"learning_rate": 1.9712613422765505e-05,
"loss": 0.0152,
"step": 16950
},
{
"epoch": 0.014667589286792786,
"grad_norm": 8.429530962139609e-10,
"learning_rate": 1.9711750400011048e-05,
"loss": 0.0035,
"step": 17000
},
{
"epoch": 0.014710729255283352,
"grad_norm": 1.2465453437471297e-05,
"learning_rate": 1.971088737725659e-05,
"loss": 0.0067,
"step": 17050
},
{
"epoch": 0.014753869223773919,
"grad_norm": 6.187327699080925e-07,
"learning_rate": 1.971002435450213e-05,
"loss": 0.0159,
"step": 17100
},
{
"epoch": 0.014797009192264485,
"grad_norm": 0.316834419965744,
"learning_rate": 1.9709161331747675e-05,
"loss": 0.0063,
"step": 17150
},
{
"epoch": 0.014840149160755054,
"grad_norm": 0.00014671437384095043,
"learning_rate": 1.9708298308993218e-05,
"loss": 0.0784,
"step": 17200
},
{
"epoch": 0.01488328912924562,
"grad_norm": 1.0954934737128497e-08,
"learning_rate": 1.9707435286238758e-05,
"loss": 0.0238,
"step": 17250
},
{
"epoch": 0.014926429097736187,
"grad_norm": 0.5361968278884888,
"learning_rate": 1.97065722634843e-05,
"loss": 0.0195,
"step": 17300
},
{
"epoch": 0.014969569066226754,
"grad_norm": 3.5330817699432373,
"learning_rate": 1.970570924072984e-05,
"loss": 0.0445,
"step": 17350
},
{
"epoch": 0.015012709034717322,
"grad_norm": 0.0001147388611570932,
"learning_rate": 1.9704846217975385e-05,
"loss": 0.0534,
"step": 17400
},
{
"epoch": 0.015055849003207888,
"grad_norm": 1.4025573237541611e-11,
"learning_rate": 1.9703983195220925e-05,
"loss": 0.0443,
"step": 17450
},
{
"epoch": 0.015098988971698455,
"grad_norm": 0.0013008522801101208,
"learning_rate": 1.9703120172466468e-05,
"loss": 0.0301,
"step": 17500
},
{
"epoch": 0.015142128940189022,
"grad_norm": 4.471134662628174,
"learning_rate": 1.970225714971201e-05,
"loss": 0.0301,
"step": 17550
},
{
"epoch": 0.01518526890867959,
"grad_norm": 6.183355708344607e-06,
"learning_rate": 1.9701394126957555e-05,
"loss": 0.0369,
"step": 17600
},
{
"epoch": 0.015228408877170157,
"grad_norm": 7.665110751986504e-05,
"learning_rate": 1.9700531104203095e-05,
"loss": 0.0056,
"step": 17650
},
{
"epoch": 0.015271548845660723,
"grad_norm": 2.106353521347046,
"learning_rate": 1.9699668081448638e-05,
"loss": 0.0272,
"step": 17700
},
{
"epoch": 0.01531468881415129,
"grad_norm": 3.855154488974222e-07,
"learning_rate": 1.9698805058694178e-05,
"loss": 0.0352,
"step": 17750
},
{
"epoch": 0.015357828782641858,
"grad_norm": 17.341279983520508,
"learning_rate": 1.969794203593972e-05,
"loss": 0.0465,
"step": 17800
},
{
"epoch": 0.015400968751132425,
"grad_norm": 9.402146679349244e-05,
"learning_rate": 1.9697079013185265e-05,
"loss": 0.0183,
"step": 17850
},
{
"epoch": 0.015444108719622991,
"grad_norm": 0.00015307770809158683,
"learning_rate": 1.9696215990430805e-05,
"loss": 0.0291,
"step": 17900
},
{
"epoch": 0.015487248688113558,
"grad_norm": 3.735563609552628e-07,
"learning_rate": 1.9695352967676348e-05,
"loss": 0.0019,
"step": 17950
},
{
"epoch": 0.015530388656604124,
"grad_norm": 5.729863187298179e-05,
"learning_rate": 1.9694489944921888e-05,
"loss": 0.0045,
"step": 18000
},
{
"epoch": 0.015573528625094693,
"grad_norm": 2.0717274562542798e-09,
"learning_rate": 1.969362692216743e-05,
"loss": 0.018,
"step": 18050
},
{
"epoch": 0.01561666859358526,
"grad_norm": 12.531591415405273,
"learning_rate": 1.9692763899412974e-05,
"loss": 0.0242,
"step": 18100
},
{
"epoch": 0.015659808562075828,
"grad_norm": 3.573931508071837e-06,
"learning_rate": 1.9691900876658518e-05,
"loss": 0.0556,
"step": 18150
},
{
"epoch": 0.015702948530566394,
"grad_norm": 1.851037545463896e-08,
"learning_rate": 1.9691037853904058e-05,
"loss": 0.0141,
"step": 18200
},
{
"epoch": 0.01574608849905696,
"grad_norm": 3.601686694310047e-05,
"learning_rate": 1.96901748311496e-05,
"loss": 0.0541,
"step": 18250
},
{
"epoch": 0.015789228467547527,
"grad_norm": 0.05700366199016571,
"learning_rate": 1.968931180839514e-05,
"loss": 0.0286,
"step": 18300
},
{
"epoch": 0.015832368436038094,
"grad_norm": 3.566603901106191e-09,
"learning_rate": 1.9688448785640684e-05,
"loss": 0.0091,
"step": 18350
},
{
"epoch": 0.01587550840452866,
"grad_norm": 0.00013142921670805663,
"learning_rate": 1.9687585762886224e-05,
"loss": 0.0254,
"step": 18400
},
{
"epoch": 0.015918648373019227,
"grad_norm": 20.01519775390625,
"learning_rate": 1.9686722740131768e-05,
"loss": 0.0771,
"step": 18450
},
{
"epoch": 0.015961788341509794,
"grad_norm": 0.1688498556613922,
"learning_rate": 1.9685859717377308e-05,
"loss": 0.0183,
"step": 18500
},
{
"epoch": 0.016004928310000364,
"grad_norm": 0.030780350789427757,
"learning_rate": 1.968499669462285e-05,
"loss": 0.0251,
"step": 18550
},
{
"epoch": 0.01604806827849093,
"grad_norm": 0.002585780341178179,
"learning_rate": 1.9684133671868394e-05,
"loss": 0.0718,
"step": 18600
},
{
"epoch": 0.016091208246981497,
"grad_norm": 4.36324262409471e-05,
"learning_rate": 1.9683270649113938e-05,
"loss": 0.0203,
"step": 18650
},
{
"epoch": 0.016134348215472064,
"grad_norm": 1.3234290463515208e-07,
"learning_rate": 1.9682407626359478e-05,
"loss": 0.0136,
"step": 18700
},
{
"epoch": 0.01617748818396263,
"grad_norm": 1.555231143868241e-08,
"learning_rate": 1.968154460360502e-05,
"loss": 0.037,
"step": 18750
},
{
"epoch": 0.016220628152453197,
"grad_norm": 2.4578237116656965e-06,
"learning_rate": 1.968068158085056e-05,
"loss": 0.0045,
"step": 18800
},
{
"epoch": 0.016263768120943763,
"grad_norm": 0.009525042027235031,
"learning_rate": 1.9679818558096104e-05,
"loss": 0.0084,
"step": 18850
},
{
"epoch": 0.01630690808943433,
"grad_norm": 22.186767578125,
"learning_rate": 1.9678955535341647e-05,
"loss": 0.0316,
"step": 18900
},
{
"epoch": 0.0163500480579249,
"grad_norm": 6.056162419554312e-06,
"learning_rate": 1.9678092512587187e-05,
"loss": 0.0085,
"step": 18950
},
{
"epoch": 0.016393188026415467,
"grad_norm": 1.4418605198684986e-09,
"learning_rate": 1.967722948983273e-05,
"loss": 0.0181,
"step": 19000
},
{
"epoch": 0.016436327994906033,
"grad_norm": 5.71908742585947e-09,
"learning_rate": 1.967636646707827e-05,
"loss": 0.0073,
"step": 19050
},
{
"epoch": 0.0164794679633966,
"grad_norm": 6.593646517671914e-09,
"learning_rate": 1.9675503444323817e-05,
"loss": 0.0685,
"step": 19100
},
{
"epoch": 0.016522607931887166,
"grad_norm": 2.7447922229766846,
"learning_rate": 1.9674640421569357e-05,
"loss": 0.0368,
"step": 19150
},
{
"epoch": 0.016565747900377733,
"grad_norm": 3.157795136488062e-09,
"learning_rate": 1.96737773988149e-05,
"loss": 0.0653,
"step": 19200
},
{
"epoch": 0.0166088878688683,
"grad_norm": 6.913658580742776e-06,
"learning_rate": 1.967291437606044e-05,
"loss": 0.053,
"step": 19250
},
{
"epoch": 0.016652027837358866,
"grad_norm": 3.1019378639030037e-06,
"learning_rate": 1.9672051353305984e-05,
"loss": 0.0392,
"step": 19300
},
{
"epoch": 0.016695167805849436,
"grad_norm": 0.00028862591716460884,
"learning_rate": 1.9671188330551524e-05,
"loss": 0.0031,
"step": 19350
},
{
"epoch": 0.016738307774340003,
"grad_norm": 2.975168058583222e-07,
"learning_rate": 1.9670325307797067e-05,
"loss": 0.0142,
"step": 19400
},
{
"epoch": 0.01678144774283057,
"grad_norm": 6.055047379049938e-07,
"learning_rate": 1.9669462285042607e-05,
"loss": 0.0294,
"step": 19450
},
{
"epoch": 0.016824587711321136,
"grad_norm": 0.0006536454311572015,
"learning_rate": 1.966859926228815e-05,
"loss": 0.0411,
"step": 19500
},
{
"epoch": 0.016867727679811702,
"grad_norm": 0.0043412791565060616,
"learning_rate": 1.9667736239533694e-05,
"loss": 0.0477,
"step": 19550
},
{
"epoch": 0.01691086764830227,
"grad_norm": 0.08467547595500946,
"learning_rate": 1.9666873216779234e-05,
"loss": 0.0041,
"step": 19600
},
{
"epoch": 0.016954007616792836,
"grad_norm": 5.161958824828616e-07,
"learning_rate": 1.9666010194024777e-05,
"loss": 0.0828,
"step": 19650
},
{
"epoch": 0.016997147585283402,
"grad_norm": 0.03497151657938957,
"learning_rate": 1.966514717127032e-05,
"loss": 0.0095,
"step": 19700
},
{
"epoch": 0.017040287553773972,
"grad_norm": 0.0004174104833509773,
"learning_rate": 1.966428414851586e-05,
"loss": 0.0206,
"step": 19750
},
{
"epoch": 0.01708342752226454,
"grad_norm": 0.00030457283719442785,
"learning_rate": 1.9663421125761404e-05,
"loss": 0.0092,
"step": 19800
},
{
"epoch": 0.017126567490755105,
"grad_norm": 0.0026671765372157097,
"learning_rate": 1.9662558103006947e-05,
"loss": 0.0083,
"step": 19850
},
{
"epoch": 0.017169707459245672,
"grad_norm": 20.56145668029785,
"learning_rate": 1.9661695080252487e-05,
"loss": 0.0259,
"step": 19900
},
{
"epoch": 0.01721284742773624,
"grad_norm": 2.7404727006796747e-05,
"learning_rate": 1.966083205749803e-05,
"loss": 0.0165,
"step": 19950
},
{
"epoch": 0.017255987396226805,
"grad_norm": 4.9371454480251487e-08,
"learning_rate": 1.965996903474357e-05,
"loss": 0.0121,
"step": 20000
},
{
"epoch": 0.017299127364717372,
"grad_norm": 0.011251527816057205,
"learning_rate": 1.9659106011989114e-05,
"loss": 0.0033,
"step": 20050
},
{
"epoch": 0.01734226733320794,
"grad_norm": 3.3487244088803436e-09,
"learning_rate": 1.9658242989234654e-05,
"loss": 0.0319,
"step": 20100
},
{
"epoch": 0.01738540730169851,
"grad_norm": 0.00034460489405319095,
"learning_rate": 1.9657379966480197e-05,
"loss": 0.0215,
"step": 20150
},
{
"epoch": 0.017428547270189075,
"grad_norm": 8.861123319547914e-07,
"learning_rate": 1.965651694372574e-05,
"loss": 0.0507,
"step": 20200
},
{
"epoch": 0.01747168723867964,
"grad_norm": 0.0008550824131816626,
"learning_rate": 1.9655653920971284e-05,
"loss": 0.0499,
"step": 20250
},
{
"epoch": 0.017514827207170208,
"grad_norm": 1.3901036766128527e-07,
"learning_rate": 1.9654790898216824e-05,
"loss": 0.0462,
"step": 20300
},
{
"epoch": 0.017557967175660775,
"grad_norm": 0.06260337680578232,
"learning_rate": 1.9653927875462367e-05,
"loss": 0.0499,
"step": 20350
},
{
"epoch": 0.01760110714415134,
"grad_norm": 1.0717659648662448e-07,
"learning_rate": 1.9653064852707907e-05,
"loss": 0.0155,
"step": 20400
},
{
"epoch": 0.017644247112641908,
"grad_norm": 7.46982475874347e-09,
"learning_rate": 1.965220182995345e-05,
"loss": 0.0605,
"step": 20450
},
{
"epoch": 0.017687387081132475,
"grad_norm": 2.092070280923508e-05,
"learning_rate": 1.9651338807198994e-05,
"loss": 0.002,
"step": 20500
},
{
"epoch": 0.017730527049623045,
"grad_norm": 8.422440259892028e-06,
"learning_rate": 1.9650475784444533e-05,
"loss": 0.0041,
"step": 20550
},
{
"epoch": 0.01777366701811361,
"grad_norm": 0.2332431972026825,
"learning_rate": 1.9649612761690077e-05,
"loss": 0.004,
"step": 20600
},
{
"epoch": 0.017816806986604178,
"grad_norm": 4.870547076762932e-09,
"learning_rate": 1.9648749738935617e-05,
"loss": 0.0452,
"step": 20650
},
{
"epoch": 0.017859946955094744,
"grad_norm": 5.206494506637682e-07,
"learning_rate": 1.964788671618116e-05,
"loss": 0.0045,
"step": 20700
},
{
"epoch": 0.01790308692358531,
"grad_norm": 2.1451814973261207e-06,
"learning_rate": 1.9647023693426703e-05,
"loss": 0.006,
"step": 20750
},
{
"epoch": 0.017946226892075878,
"grad_norm": 8.108095244097058e-06,
"learning_rate": 1.9646160670672247e-05,
"loss": 0.0345,
"step": 20800
},
{
"epoch": 0.017989366860566444,
"grad_norm": 0.025016743689775467,
"learning_rate": 1.9645297647917787e-05,
"loss": 0.0532,
"step": 20850
},
{
"epoch": 0.01803250682905701,
"grad_norm": 5.400533609645208e-06,
"learning_rate": 1.964443462516333e-05,
"loss": 0.0021,
"step": 20900
},
{
"epoch": 0.018075646797547577,
"grad_norm": 2.8619383556360845e-06,
"learning_rate": 1.964357160240887e-05,
"loss": 0.0362,
"step": 20950
},
{
"epoch": 0.018118786766038147,
"grad_norm": 3.4743165969848633,
"learning_rate": 1.9642708579654413e-05,
"loss": 0.0136,
"step": 21000
},
{
"epoch": 0.018161926734528714,
"grad_norm": 59.8224983215332,
"learning_rate": 1.9641845556899953e-05,
"loss": 0.014,
"step": 21050
},
{
"epoch": 0.01820506670301928,
"grad_norm": 8.128851186484098e-05,
"learning_rate": 1.9640982534145497e-05,
"loss": 0.0295,
"step": 21100
},
{
"epoch": 0.018248206671509847,
"grad_norm": 3.548375752870925e-05,
"learning_rate": 1.9640119511391037e-05,
"loss": 0.0286,
"step": 21150
},
{
"epoch": 0.018291346640000414,
"grad_norm": 0.0468142107129097,
"learning_rate": 1.963925648863658e-05,
"loss": 0.0525,
"step": 21200
},
{
"epoch": 0.01833448660849098,
"grad_norm": 4.863815320277354e-06,
"learning_rate": 1.9638393465882123e-05,
"loss": 0.0063,
"step": 21250
},
{
"epoch": 0.018377626576981547,
"grad_norm": 9.059208938566599e-10,
"learning_rate": 1.9637530443127667e-05,
"loss": 0.0217,
"step": 21300
},
{
"epoch": 0.018420766545472114,
"grad_norm": 0.9207327365875244,
"learning_rate": 1.9636667420373206e-05,
"loss": 0.0054,
"step": 21350
},
{
"epoch": 0.018463906513962684,
"grad_norm": 0.00036540269502438605,
"learning_rate": 1.963580439761875e-05,
"loss": 0.0188,
"step": 21400
},
{
"epoch": 0.01850704648245325,
"grad_norm": 0.00022348039783537388,
"learning_rate": 1.963494137486429e-05,
"loss": 0.0025,
"step": 21450
},
{
"epoch": 0.018550186450943817,
"grad_norm": 0.27767083048820496,
"learning_rate": 1.9634078352109833e-05,
"loss": 0.018,
"step": 21500
},
{
"epoch": 0.018593326419434383,
"grad_norm": 0.022822152823209763,
"learning_rate": 1.9633215329355376e-05,
"loss": 0.0569,
"step": 21550
},
{
"epoch": 0.01863646638792495,
"grad_norm": 0.00016692353528924286,
"learning_rate": 1.9632352306600916e-05,
"loss": 0.0227,
"step": 21600
},
{
"epoch": 0.018679606356415517,
"grad_norm": 0.5533714890480042,
"learning_rate": 1.963148928384646e-05,
"loss": 0.0112,
"step": 21650
},
{
"epoch": 0.018722746324906083,
"grad_norm": 0.030804995447397232,
"learning_rate": 1.9630626261092e-05,
"loss": 0.0083,
"step": 21700
},
{
"epoch": 0.01876588629339665,
"grad_norm": 1.79214639501879e-05,
"learning_rate": 1.9629763238337546e-05,
"loss": 0.0079,
"step": 21750
},
{
"epoch": 0.01880902626188722,
"grad_norm": 1.6093619492618672e-10,
"learning_rate": 1.9628900215583086e-05,
"loss": 0.0156,
"step": 21800
},
{
"epoch": 0.018852166230377786,
"grad_norm": 0.005034497939050198,
"learning_rate": 1.962803719282863e-05,
"loss": 0.0623,
"step": 21850
},
{
"epoch": 0.018895306198868353,
"grad_norm": 0.017401648685336113,
"learning_rate": 1.962717417007417e-05,
"loss": 0.0258,
"step": 21900
},
{
"epoch": 0.01893844616735892,
"grad_norm": 2.2319347858428955,
"learning_rate": 1.9626311147319713e-05,
"loss": 0.0201,
"step": 21950
},
{
"epoch": 0.018981586135849486,
"grad_norm": 8.550871825718787e-06,
"learning_rate": 1.9625448124565253e-05,
"loss": 0.009,
"step": 22000
},
{
"epoch": 0.019024726104340053,
"grad_norm": 1.8346406704949914e-06,
"learning_rate": 1.9624585101810796e-05,
"loss": 0.0806,
"step": 22050
},
{
"epoch": 0.01906786607283062,
"grad_norm": 12.84133243560791,
"learning_rate": 1.9623722079056336e-05,
"loss": 0.0097,
"step": 22100
},
{
"epoch": 0.019111006041321186,
"grad_norm": 9.22921472579219e-09,
"learning_rate": 1.962285905630188e-05,
"loss": 0.0185,
"step": 22150
},
{
"epoch": 0.019154146009811756,
"grad_norm": 1.6999269723892212,
"learning_rate": 1.9621996033547423e-05,
"loss": 0.0145,
"step": 22200
},
{
"epoch": 0.019197285978302323,
"grad_norm": 2.9266016483306885,
"learning_rate": 1.9621133010792963e-05,
"loss": 0.0214,
"step": 22250
},
{
"epoch": 0.01924042594679289,
"grad_norm": 0.13319005072116852,
"learning_rate": 1.9620269988038506e-05,
"loss": 0.0542,
"step": 22300
},
{
"epoch": 0.019283565915283456,
"grad_norm": 1.2659254934987985e-05,
"learning_rate": 1.961940696528405e-05,
"loss": 0.0059,
"step": 22350
},
{
"epoch": 0.019326705883774022,
"grad_norm": 2.33125811064383e-05,
"learning_rate": 1.961854394252959e-05,
"loss": 0.0414,
"step": 22400
},
{
"epoch": 0.01936984585226459,
"grad_norm": 0.008146941661834717,
"learning_rate": 1.9617680919775133e-05,
"loss": 0.0129,
"step": 22450
},
{
"epoch": 0.019412985820755155,
"grad_norm": 4.2442545236554e-05,
"learning_rate": 1.9616817897020676e-05,
"loss": 0.0237,
"step": 22500
},
{
"epoch": 0.019456125789245722,
"grad_norm": 6.483288217395966e-08,
"learning_rate": 1.9615954874266216e-05,
"loss": 0.0036,
"step": 22550
},
{
"epoch": 0.019499265757736292,
"grad_norm": 0.025942707434296608,
"learning_rate": 1.961509185151176e-05,
"loss": 0.0233,
"step": 22600
},
{
"epoch": 0.01954240572622686,
"grad_norm": 0.004933039657771587,
"learning_rate": 1.96142288287573e-05,
"loss": 0.0279,
"step": 22650
},
{
"epoch": 0.019585545694717425,
"grad_norm": 9.285894102262215e-12,
"learning_rate": 1.9613365806002843e-05,
"loss": 0.0137,
"step": 22700
},
{
"epoch": 0.019628685663207992,
"grad_norm": 17.506160736083984,
"learning_rate": 1.9612502783248383e-05,
"loss": 0.0106,
"step": 22750
},
{
"epoch": 0.01967182563169856,
"grad_norm": 1.2982255270799214e-07,
"learning_rate": 1.9611639760493926e-05,
"loss": 0.0066,
"step": 22800
},
{
"epoch": 0.019714965600189125,
"grad_norm": 3.575518903176089e-08,
"learning_rate": 1.961077673773947e-05,
"loss": 0.0187,
"step": 22850
},
{
"epoch": 0.01975810556867969,
"grad_norm": 0.04352926090359688,
"learning_rate": 1.9609913714985013e-05,
"loss": 0.0127,
"step": 22900
},
{
"epoch": 0.019801245537170258,
"grad_norm": 15.828106880187988,
"learning_rate": 1.9609050692230552e-05,
"loss": 0.0234,
"step": 22950
},
{
"epoch": 0.01984438550566083,
"grad_norm": 2.8101124982526926e-08,
"learning_rate": 1.9608187669476096e-05,
"loss": 0.0406,
"step": 23000
},
{
"epoch": 0.019887525474151395,
"grad_norm": 1.5754636478959583e-05,
"learning_rate": 1.9607324646721636e-05,
"loss": 0.013,
"step": 23050
},
{
"epoch": 0.01993066544264196,
"grad_norm": 0.00016132810560520738,
"learning_rate": 1.960646162396718e-05,
"loss": 0.0661,
"step": 23100
},
{
"epoch": 0.019973805411132528,
"grad_norm": 0.009830374270677567,
"learning_rate": 1.9605598601212722e-05,
"loss": 0.0001,
"step": 23150
},
{
"epoch": 0.020016945379623095,
"grad_norm": 1.5961271415676492e-08,
"learning_rate": 1.9604735578458262e-05,
"loss": 0.0001,
"step": 23200
},
{
"epoch": 0.02006008534811366,
"grad_norm": 0.6032620668411255,
"learning_rate": 1.9603872555703806e-05,
"loss": 0.057,
"step": 23250
},
{
"epoch": 0.020103225316604228,
"grad_norm": 0.0007053284207358956,
"learning_rate": 1.9603009532949346e-05,
"loss": 0.0328,
"step": 23300
},
{
"epoch": 0.020146365285094794,
"grad_norm": 0.00022471090778708458,
"learning_rate": 1.960214651019489e-05,
"loss": 0.0176,
"step": 23350
},
{
"epoch": 0.020189505253585364,
"grad_norm": 3.784521595662227e-06,
"learning_rate": 1.9601283487440432e-05,
"loss": 0.0342,
"step": 23400
},
{
"epoch": 0.02023264522207593,
"grad_norm": 0.0002926274319179356,
"learning_rate": 1.9600420464685976e-05,
"loss": 0.0622,
"step": 23450
},
{
"epoch": 0.020275785190566498,
"grad_norm": 0.0005665869684889913,
"learning_rate": 1.9599557441931516e-05,
"loss": 0.0319,
"step": 23500
},
{
"epoch": 0.020318925159057064,
"grad_norm": 0.0020943868439644575,
"learning_rate": 1.959869441917706e-05,
"loss": 0.0437,
"step": 23550
},
{
"epoch": 0.02036206512754763,
"grad_norm": 0.007852623239159584,
"learning_rate": 1.95978313964226e-05,
"loss": 0.0002,
"step": 23600
},
{
"epoch": 0.020405205096038197,
"grad_norm": 1.9628392457962036,
"learning_rate": 1.9596968373668142e-05,
"loss": 0.006,
"step": 23650
},
{
"epoch": 0.020448345064528764,
"grad_norm": 2.0241428533296357e-09,
"learning_rate": 1.9596105350913682e-05,
"loss": 0.0491,
"step": 23700
},
{
"epoch": 0.02049148503301933,
"grad_norm": 1.5093628569218254e-09,
"learning_rate": 1.9595242328159226e-05,
"loss": 0.0218,
"step": 23750
},
{
"epoch": 0.020534625001509897,
"grad_norm": 0.013457014225423336,
"learning_rate": 1.9594379305404765e-05,
"loss": 0.0246,
"step": 23800
},
{
"epoch": 0.020577764970000467,
"grad_norm": 2.7149107495461067e-07,
"learning_rate": 1.959351628265031e-05,
"loss": 0.0309,
"step": 23850
},
{
"epoch": 0.020620904938491034,
"grad_norm": 2.928385534062272e-09,
"learning_rate": 1.9592653259895852e-05,
"loss": 0.0243,
"step": 23900
},
{
"epoch": 0.0206640449069816,
"grad_norm": 0.0007422782364301383,
"learning_rate": 1.9591790237141395e-05,
"loss": 0.0483,
"step": 23950
},
{
"epoch": 0.020707184875472167,
"grad_norm": 3.9503233892901335e-06,
"learning_rate": 1.9590927214386935e-05,
"loss": 0.0281,
"step": 24000
},
{
"epoch": 0.020750324843962734,
"grad_norm": 0.07909461110830307,
"learning_rate": 1.959006419163248e-05,
"loss": 0.0137,
"step": 24050
},
{
"epoch": 0.0207934648124533,
"grad_norm": 1.3648401853139092e-10,
"learning_rate": 1.958920116887802e-05,
"loss": 0.046,
"step": 24100
},
{
"epoch": 0.020836604780943867,
"grad_norm": 1.791205619383618e-07,
"learning_rate": 1.9588338146123562e-05,
"loss": 0.0303,
"step": 24150
},
{
"epoch": 0.020879744749434433,
"grad_norm": 3.758560573885461e-09,
"learning_rate": 1.9587475123369105e-05,
"loss": 0.0029,
"step": 24200
},
{
"epoch": 0.020922884717925003,
"grad_norm": 2.0997137362144258e-10,
"learning_rate": 1.9586612100614645e-05,
"loss": 0.0431,
"step": 24250
},
{
"epoch": 0.02096602468641557,
"grad_norm": 4.752119064331055,
"learning_rate": 1.958574907786019e-05,
"loss": 0.0253,
"step": 24300
},
{
"epoch": 0.021009164654906137,
"grad_norm": 0.004993764217942953,
"learning_rate": 1.958488605510573e-05,
"loss": 0.0292,
"step": 24350
},
{
"epoch": 0.021052304623396703,
"grad_norm": 1.2806524729569446e-09,
"learning_rate": 1.9584023032351275e-05,
"loss": 0.0538,
"step": 24400
},
{
"epoch": 0.02109544459188727,
"grad_norm": 6.973591126779866e-08,
"learning_rate": 1.9583160009596815e-05,
"loss": 0.0272,
"step": 24450
},
{
"epoch": 0.021138584560377836,
"grad_norm": 0.042537808418273926,
"learning_rate": 1.958229698684236e-05,
"loss": 0.046,
"step": 24500
},
{
"epoch": 0.021181724528868403,
"grad_norm": 0.0006602337816730142,
"learning_rate": 1.95814339640879e-05,
"loss": 0.024,
"step": 24550
},
{
"epoch": 0.02122486449735897,
"grad_norm": 22.432666778564453,
"learning_rate": 1.9580570941333442e-05,
"loss": 0.0484,
"step": 24600
},
{
"epoch": 0.02126800446584954,
"grad_norm": 0.024881912395358086,
"learning_rate": 1.9579707918578982e-05,
"loss": 0.0061,
"step": 24650
},
{
"epoch": 0.021311144434340106,
"grad_norm": 9.876566764432937e-06,
"learning_rate": 1.9578844895824525e-05,
"loss": 0.033,
"step": 24700
},
{
"epoch": 0.021354284402830673,
"grad_norm": 1.04228820418939e-05,
"learning_rate": 1.9577981873070065e-05,
"loss": 0.0246,
"step": 24750
},
{
"epoch": 0.02139742437132124,
"grad_norm": 4.033939262626518e-07,
"learning_rate": 1.957711885031561e-05,
"loss": 0.0273,
"step": 24800
},
{
"epoch": 0.021440564339811806,
"grad_norm": 1.8699473002925515e-05,
"learning_rate": 1.9576255827561152e-05,
"loss": 0.0404,
"step": 24850
},
{
"epoch": 0.021483704308302373,
"grad_norm": 7.583350480899753e-08,
"learning_rate": 1.957539280480669e-05,
"loss": 0.0265,
"step": 24900
},
{
"epoch": 0.02152684427679294,
"grad_norm": 0.02612815983593464,
"learning_rate": 1.9574529782052235e-05,
"loss": 0.0219,
"step": 24950
},
{
"epoch": 0.021569984245283506,
"grad_norm": 5.127071176502795e-07,
"learning_rate": 1.957366675929778e-05,
"loss": 0.0609,
"step": 25000
},
{
"epoch": 0.021613124213774076,
"grad_norm": 0.00036468004691414535,
"learning_rate": 1.957280373654332e-05,
"loss": 0.0173,
"step": 25050
},
{
"epoch": 0.021656264182264642,
"grad_norm": 4.805618573300308e-06,
"learning_rate": 1.957194071378886e-05,
"loss": 0.0478,
"step": 25100
},
{
"epoch": 0.02169940415075521,
"grad_norm": 0.003498099045827985,
"learning_rate": 1.9571077691034405e-05,
"loss": 0.0422,
"step": 25150
},
{
"epoch": 0.021742544119245776,
"grad_norm": 3.893982466252055e-06,
"learning_rate": 1.9570214668279945e-05,
"loss": 0.024,
"step": 25200
},
{
"epoch": 0.021785684087736342,
"grad_norm": 6.174719402451956e-08,
"learning_rate": 1.9569351645525488e-05,
"loss": 0.0448,
"step": 25250
},
{
"epoch": 0.02182882405622691,
"grad_norm": 0.8544023633003235,
"learning_rate": 1.9568488622771028e-05,
"loss": 0.009,
"step": 25300
},
{
"epoch": 0.021871964024717475,
"grad_norm": 1.8829781822660152e-07,
"learning_rate": 1.956762560001657e-05,
"loss": 0.0059,
"step": 25350
},
{
"epoch": 0.021915103993208042,
"grad_norm": 1.7753800420905463e-06,
"learning_rate": 1.956676257726211e-05,
"loss": 0.0614,
"step": 25400
},
{
"epoch": 0.021958243961698612,
"grad_norm": 2.652618924514627e-08,
"learning_rate": 1.9565899554507655e-05,
"loss": 0.0184,
"step": 25450
},
{
"epoch": 0.02200138393018918,
"grad_norm": 0.33340388536453247,
"learning_rate": 1.9565036531753198e-05,
"loss": 0.0166,
"step": 25500
},
{
"epoch": 0.022044523898679745,
"grad_norm": 0.40569502115249634,
"learning_rate": 1.956417350899874e-05,
"loss": 0.0179,
"step": 25550
},
{
"epoch": 0.02208766386717031,
"grad_norm": 0.00011573725350899622,
"learning_rate": 1.956331048624428e-05,
"loss": 0.0006,
"step": 25600
},
{
"epoch": 0.02213080383566088,
"grad_norm": 2.554327238613041e-06,
"learning_rate": 1.9562447463489825e-05,
"loss": 0.0402,
"step": 25650
},
{
"epoch": 0.022173943804151445,
"grad_norm": 8.304319010221661e-08,
"learning_rate": 1.9561584440735365e-05,
"loss": 0.0363,
"step": 25700
},
{
"epoch": 0.02221708377264201,
"grad_norm": 1.8539299873054915e-08,
"learning_rate": 1.9560721417980908e-05,
"loss": 0.0042,
"step": 25750
},
{
"epoch": 0.022260223741132578,
"grad_norm": 0.043552886694669724,
"learning_rate": 1.955985839522645e-05,
"loss": 0.0358,
"step": 25800
},
{
"epoch": 0.022303363709623148,
"grad_norm": 0.00025480103795416653,
"learning_rate": 1.955899537247199e-05,
"loss": 0.0349,
"step": 25850
},
{
"epoch": 0.022346503678113715,
"grad_norm": 0.0006263578543439507,
"learning_rate": 1.9558132349717535e-05,
"loss": 0.0184,
"step": 25900
},
{
"epoch": 0.02238964364660428,
"grad_norm": 2.677586793899536,
"learning_rate": 1.9557269326963075e-05,
"loss": 0.0667,
"step": 25950
},
{
"epoch": 0.022432783615094848,
"grad_norm": 0.6284056305885315,
"learning_rate": 1.9556406304208618e-05,
"loss": 0.0061,
"step": 26000
},
{
"epoch": 0.022475923583585414,
"grad_norm": 0.01573588326573372,
"learning_rate": 1.955554328145416e-05,
"loss": 0.0515,
"step": 26050
},
{
"epoch": 0.02251906355207598,
"grad_norm": 9.318134289060254e-06,
"learning_rate": 1.9554680258699705e-05,
"loss": 0.0231,
"step": 26100
},
{
"epoch": 0.022562203520566548,
"grad_norm": 3.892751294642949e-07,
"learning_rate": 1.9553817235945245e-05,
"loss": 0.004,
"step": 26150
},
{
"epoch": 0.022605343489057114,
"grad_norm": 0.0010842111660167575,
"learning_rate": 1.9552954213190788e-05,
"loss": 0.0568,
"step": 26200
},
{
"epoch": 0.022648483457547684,
"grad_norm": 0.021115347743034363,
"learning_rate": 1.9552091190436328e-05,
"loss": 0.0711,
"step": 26250
},
{
"epoch": 0.02269162342603825,
"grad_norm": 0.07015379518270493,
"learning_rate": 1.955122816768187e-05,
"loss": 0.0305,
"step": 26300
},
{
"epoch": 0.022734763394528817,
"grad_norm": 3.8024263631086797e-05,
"learning_rate": 1.955036514492741e-05,
"loss": 0.0309,
"step": 26350
},
{
"epoch": 0.022777903363019384,
"grad_norm": 0.0043113697320222855,
"learning_rate": 1.9549502122172954e-05,
"loss": 0.0066,
"step": 26400
},
{
"epoch": 0.02282104333150995,
"grad_norm": 0.007588895037770271,
"learning_rate": 1.9548639099418494e-05,
"loss": 0.0242,
"step": 26450
},
{
"epoch": 0.022864183300000517,
"grad_norm": 1.8674474954605103,
"learning_rate": 1.9547776076664038e-05,
"loss": 0.0163,
"step": 26500
},
{
"epoch": 0.022907323268491084,
"grad_norm": 4.954452991485596,
"learning_rate": 1.954691305390958e-05,
"loss": 0.0368,
"step": 26550
},
{
"epoch": 0.02295046323698165,
"grad_norm": 0.0024081666488200426,
"learning_rate": 1.9546050031155124e-05,
"loss": 0.0255,
"step": 26600
},
{
"epoch": 0.02299360320547222,
"grad_norm": 0.4166341722011566,
"learning_rate": 1.9545187008400664e-05,
"loss": 0.0331,
"step": 26650
},
{
"epoch": 0.023036743173962787,
"grad_norm": 0.00036967426422052085,
"learning_rate": 1.9544323985646208e-05,
"loss": 0.0282,
"step": 26700
},
{
"epoch": 0.023079883142453354,
"grad_norm": 1.1294196688993452e-08,
"learning_rate": 1.954346096289175e-05,
"loss": 0.0293,
"step": 26750
},
{
"epoch": 0.02312302311094392,
"grad_norm": 24.33706283569336,
"learning_rate": 1.954259794013729e-05,
"loss": 0.0475,
"step": 26800
},
{
"epoch": 0.023166163079434487,
"grad_norm": 1.3493994366342577e-08,
"learning_rate": 1.9541734917382834e-05,
"loss": 0.0045,
"step": 26850
},
{
"epoch": 0.023209303047925053,
"grad_norm": 6.673410098301247e-05,
"learning_rate": 1.9540871894628374e-05,
"loss": 0.0059,
"step": 26900
},
{
"epoch": 0.02325244301641562,
"grad_norm": 0.0014361342182382941,
"learning_rate": 1.9540008871873918e-05,
"loss": 0.0002,
"step": 26950
},
{
"epoch": 0.023295582984906187,
"grad_norm": 3.2534658908843994,
"learning_rate": 1.9539145849119458e-05,
"loss": 0.0329,
"step": 27000
},
{
"epoch": 0.023338722953396753,
"grad_norm": 0.0029180857818573713,
"learning_rate": 1.9538282826365004e-05,
"loss": 0.0007,
"step": 27050
},
{
"epoch": 0.023381862921887323,
"grad_norm": 7.010048866271973,
"learning_rate": 1.9537419803610544e-05,
"loss": 0.0473,
"step": 27100
},
{
"epoch": 0.02342500289037789,
"grad_norm": 0.5129420757293701,
"learning_rate": 1.9536556780856088e-05,
"loss": 0.0312,
"step": 27150
},
{
"epoch": 0.023468142858868456,
"grad_norm": 0.008801298215985298,
"learning_rate": 1.9535693758101627e-05,
"loss": 0.0025,
"step": 27200
},
{
"epoch": 0.023511282827359023,
"grad_norm": 7.381456001986919e-10,
"learning_rate": 1.953483073534717e-05,
"loss": 0.0399,
"step": 27250
},
{
"epoch": 0.02355442279584959,
"grad_norm": 0.015433188527822495,
"learning_rate": 1.953396771259271e-05,
"loss": 0.0248,
"step": 27300
},
{
"epoch": 0.023597562764340156,
"grad_norm": 3.086728572845459,
"learning_rate": 1.9533104689838254e-05,
"loss": 0.0124,
"step": 27350
},
{
"epoch": 0.023640702732830723,
"grad_norm": 5.318460255532287e-11,
"learning_rate": 1.9532241667083794e-05,
"loss": 0.0259,
"step": 27400
},
{
"epoch": 0.02368384270132129,
"grad_norm": 0.0015008836053311825,
"learning_rate": 1.9531378644329337e-05,
"loss": 0.0128,
"step": 27450
},
{
"epoch": 0.02372698266981186,
"grad_norm": 4.6280136302812025e-05,
"learning_rate": 1.953051562157488e-05,
"loss": 0.0005,
"step": 27500
},
{
"epoch": 0.023770122638302426,
"grad_norm": 4.795760560227791e-06,
"learning_rate": 1.952965259882042e-05,
"loss": 0.0155,
"step": 27550
},
{
"epoch": 0.023813262606792993,
"grad_norm": 0.19684414565563202,
"learning_rate": 1.9528789576065964e-05,
"loss": 0.0042,
"step": 27600
},
{
"epoch": 0.02385640257528356,
"grad_norm": 1.0629539559658951e-07,
"learning_rate": 1.9527926553311507e-05,
"loss": 0.0097,
"step": 27650
},
{
"epoch": 0.023899542543774126,
"grad_norm": 1.161576043684498e-11,
"learning_rate": 1.9527063530557047e-05,
"loss": 0.0009,
"step": 27700
},
{
"epoch": 0.023942682512264692,
"grad_norm": 1.7004417318666754e-10,
"learning_rate": 1.952620050780259e-05,
"loss": 0.0324,
"step": 27750
},
{
"epoch": 0.02398582248075526,
"grad_norm": 3.243289393140003e-05,
"learning_rate": 1.9525337485048134e-05,
"loss": 0.0768,
"step": 27800
},
{
"epoch": 0.024028962449245826,
"grad_norm": 0.0029646342154592276,
"learning_rate": 1.9524474462293674e-05,
"loss": 0.0142,
"step": 27850
},
{
"epoch": 0.024072102417736396,
"grad_norm": 0.0012051378143951297,
"learning_rate": 1.9523611439539217e-05,
"loss": 0.0147,
"step": 27900
},
{
"epoch": 0.024115242386226962,
"grad_norm": 1.3464485164149664e-05,
"learning_rate": 1.9522748416784757e-05,
"loss": 0.0023,
"step": 27950
},
{
"epoch": 0.02415838235471753,
"grad_norm": 0.0002646016946528107,
"learning_rate": 1.95218853940303e-05,
"loss": 0.0098,
"step": 28000
},
{
"epoch": 0.024201522323208095,
"grad_norm": 6.006689727655612e-05,
"learning_rate": 1.952102237127584e-05,
"loss": 0.0052,
"step": 28050
},
{
"epoch": 0.024244662291698662,
"grad_norm": 31.13625717163086,
"learning_rate": 1.9520159348521384e-05,
"loss": 0.0298,
"step": 28100
},
{
"epoch": 0.02428780226018923,
"grad_norm": 0.00010399877646705136,
"learning_rate": 1.9519296325766927e-05,
"loss": 0.0512,
"step": 28150
},
{
"epoch": 0.024330942228679795,
"grad_norm": 9.850235755948233e-07,
"learning_rate": 1.951843330301247e-05,
"loss": 0.052,
"step": 28200
},
{
"epoch": 0.02437408219717036,
"grad_norm": 3.698731597978622e-05,
"learning_rate": 1.951757028025801e-05,
"loss": 0.0017,
"step": 28250
},
{
"epoch": 0.02441722216566093,
"grad_norm": 0.04309392347931862,
"learning_rate": 1.9516707257503554e-05,
"loss": 0.0385,
"step": 28300
},
{
"epoch": 0.0244603621341515,
"grad_norm": 9.081038115255069e-06,
"learning_rate": 1.9515844234749094e-05,
"loss": 0.0418,
"step": 28350
},
{
"epoch": 0.024503502102642065,
"grad_norm": 0.48385998606681824,
"learning_rate": 1.9514981211994637e-05,
"loss": 0.0207,
"step": 28400
},
{
"epoch": 0.02454664207113263,
"grad_norm": 1.9165490527939255e-07,
"learning_rate": 1.951411818924018e-05,
"loss": 0.0206,
"step": 28450
},
{
"epoch": 0.024589782039623198,
"grad_norm": 1.4679693776997738e-05,
"learning_rate": 1.951325516648572e-05,
"loss": 0.0346,
"step": 28500
},
{
"epoch": 0.024632922008113765,
"grad_norm": 0.11278124898672104,
"learning_rate": 1.9512392143731264e-05,
"loss": 0.0081,
"step": 28550
},
{
"epoch": 0.02467606197660433,
"grad_norm": 9.307966024607595e-07,
"learning_rate": 1.9511529120976804e-05,
"loss": 0.012,
"step": 28600
},
{
"epoch": 0.024719201945094898,
"grad_norm": 0.00027512782253324986,
"learning_rate": 1.9510666098222347e-05,
"loss": 0.0032,
"step": 28650
},
{
"epoch": 0.024762341913585468,
"grad_norm": 0.11172260344028473,
"learning_rate": 1.950980307546789e-05,
"loss": 0.0032,
"step": 28700
},
{
"epoch": 0.024805481882076034,
"grad_norm": 2.1106679923832417e-06,
"learning_rate": 1.9508940052713434e-05,
"loss": 0.0339,
"step": 28750
},
{
"epoch": 0.0248486218505666,
"grad_norm": 0.028800344094634056,
"learning_rate": 1.9508077029958973e-05,
"loss": 0.0278,
"step": 28800
},
{
"epoch": 0.024891761819057168,
"grad_norm": 1.757417521730531e-06,
"learning_rate": 1.9507214007204517e-05,
"loss": 0.0164,
"step": 28850
},
{
"epoch": 0.024934901787547734,
"grad_norm": 4.451398893934311e-08,
"learning_rate": 1.9506350984450057e-05,
"loss": 0.009,
"step": 28900
},
{
"epoch": 0.0249780417560383,
"grad_norm": 4.7023010552038613e-07,
"learning_rate": 1.95054879616956e-05,
"loss": 0.01,
"step": 28950
},
{
"epoch": 0.025021181724528867,
"grad_norm": 0.3000449538230896,
"learning_rate": 1.950462493894114e-05,
"loss": 0.0208,
"step": 29000
},
{
"epoch": 0.025064321693019434,
"grad_norm": 2.5534254746162333e-06,
"learning_rate": 1.9503761916186683e-05,
"loss": 0.0113,
"step": 29050
},
{
"epoch": 0.025107461661510004,
"grad_norm": 1.051041209620962e-07,
"learning_rate": 1.9502898893432223e-05,
"loss": 0.0286,
"step": 29100
},
{
"epoch": 0.02515060163000057,
"grad_norm": 0.11379561573266983,
"learning_rate": 1.9502035870677767e-05,
"loss": 0.0273,
"step": 29150
},
{
"epoch": 0.025193741598491137,
"grad_norm": 4.075488391208637e-09,
"learning_rate": 1.950117284792331e-05,
"loss": 0.001,
"step": 29200
},
{
"epoch": 0.025236881566981704,
"grad_norm": 6.561435283991557e-10,
"learning_rate": 1.9500309825168853e-05,
"loss": 0.0002,
"step": 29250
},
{
"epoch": 0.02528002153547227,
"grad_norm": 2.523017644882202,
"learning_rate": 1.9499446802414393e-05,
"loss": 0.0078,
"step": 29300
},
{
"epoch": 0.025323161503962837,
"grad_norm": 1.13604746729834e-05,
"learning_rate": 1.9498583779659937e-05,
"loss": 0.0009,
"step": 29350
},
{
"epoch": 0.025366301472453404,
"grad_norm": 0.00017209288489539176,
"learning_rate": 1.949772075690548e-05,
"loss": 0.0157,
"step": 29400
},
{
"epoch": 0.02540944144094397,
"grad_norm": 0.00011601659207371995,
"learning_rate": 1.949685773415102e-05,
"loss": 0.045,
"step": 29450
},
{
"epoch": 0.02545258140943454,
"grad_norm": 22.94985580444336,
"learning_rate": 1.9495994711396563e-05,
"loss": 0.0269,
"step": 29500
},
{
"epoch": 0.025495721377925107,
"grad_norm": 2.782198776918321e-11,
"learning_rate": 1.9495131688642103e-05,
"loss": 0.0025,
"step": 29550
},
{
"epoch": 0.025538861346415673,
"grad_norm": 2.155955371563323e-05,
"learning_rate": 1.9494268665887646e-05,
"loss": 0.0452,
"step": 29600
},
{
"epoch": 0.02558200131490624,
"grad_norm": 8.12989310361445e-06,
"learning_rate": 1.9493405643133186e-05,
"loss": 0.019,
"step": 29650
},
{
"epoch": 0.025625141283396807,
"grad_norm": 0.000956275500357151,
"learning_rate": 1.9492542620378733e-05,
"loss": 0.0122,
"step": 29700
},
{
"epoch": 0.025668281251887373,
"grad_norm": 3.834348838438473e-09,
"learning_rate": 1.9491679597624273e-05,
"loss": 0.0255,
"step": 29750
},
{
"epoch": 0.02571142122037794,
"grad_norm": 0.16173326969146729,
"learning_rate": 1.9490816574869816e-05,
"loss": 0.0382,
"step": 29800
},
{
"epoch": 0.025754561188868506,
"grad_norm": 0.0008912076009437442,
"learning_rate": 1.9489953552115356e-05,
"loss": 0.029,
"step": 29850
},
{
"epoch": 0.025797701157359073,
"grad_norm": 1.517190213462527e-07,
"learning_rate": 1.94890905293609e-05,
"loss": 0.0413,
"step": 29900
},
{
"epoch": 0.025840841125849643,
"grad_norm": 6.658311946239337e-08,
"learning_rate": 1.948822750660644e-05,
"loss": 0.0142,
"step": 29950
},
{
"epoch": 0.02588398109434021,
"grad_norm": 0.0003508856752887368,
"learning_rate": 1.9487364483851983e-05,
"loss": 0.0079,
"step": 30000
},
{
"epoch": 0.025927121062830776,
"grad_norm": 0.026366397738456726,
"learning_rate": 1.9486501461097523e-05,
"loss": 0.0381,
"step": 30050
},
{
"epoch": 0.025970261031321343,
"grad_norm": 5.6284894943237305,
"learning_rate": 1.9485638438343066e-05,
"loss": 0.0618,
"step": 30100
},
{
"epoch": 0.02601340099981191,
"grad_norm": 0.00824633240699768,
"learning_rate": 1.948477541558861e-05,
"loss": 0.0173,
"step": 30150
},
{
"epoch": 0.026056540968302476,
"grad_norm": 0.0007174229249358177,
"learning_rate": 1.948391239283415e-05,
"loss": 0.0199,
"step": 30200
},
{
"epoch": 0.026099680936793043,
"grad_norm": 0.02488381415605545,
"learning_rate": 1.9483049370079693e-05,
"loss": 0.0312,
"step": 30250
},
{
"epoch": 0.02614282090528361,
"grad_norm": 1.9344063997268677,
"learning_rate": 1.9482186347325236e-05,
"loss": 0.0357,
"step": 30300
},
{
"epoch": 0.02618596087377418,
"grad_norm": 3.485973834991455,
"learning_rate": 1.9481323324570776e-05,
"loss": 0.0266,
"step": 30350
},
{
"epoch": 0.026229100842264746,
"grad_norm": 6.07471008606808e-07,
"learning_rate": 1.948046030181632e-05,
"loss": 0.0308,
"step": 30400
},
{
"epoch": 0.026272240810755312,
"grad_norm": 7.532801760135044e-08,
"learning_rate": 1.9479597279061863e-05,
"loss": 0.047,
"step": 30450
},
{
"epoch": 0.02631538077924588,
"grad_norm": 0.0005202541360631585,
"learning_rate": 1.9478734256307403e-05,
"loss": 0.0239,
"step": 30500
},
{
"epoch": 0.026358520747736446,
"grad_norm": 26.940954208374023,
"learning_rate": 1.9477871233552946e-05,
"loss": 0.0709,
"step": 30550
},
{
"epoch": 0.026401660716227012,
"grad_norm": 7.362630470575393e-12,
"learning_rate": 1.9477008210798486e-05,
"loss": 0.0117,
"step": 30600
},
{
"epoch": 0.02644480068471758,
"grad_norm": 1.2002854418824427e-05,
"learning_rate": 1.947614518804403e-05,
"loss": 0.0392,
"step": 30650
},
{
"epoch": 0.026487940653208145,
"grad_norm": 0.4743211269378662,
"learning_rate": 1.947528216528957e-05,
"loss": 0.0461,
"step": 30700
},
{
"epoch": 0.026531080621698715,
"grad_norm": 5.520277568393794e-10,
"learning_rate": 1.9474419142535113e-05,
"loss": 0.002,
"step": 30750
},
{
"epoch": 0.026574220590189282,
"grad_norm": 5.655643420254819e-08,
"learning_rate": 1.9473556119780656e-05,
"loss": 0.002,
"step": 30800
},
{
"epoch": 0.02661736055867985,
"grad_norm": 3.0585747481381986e-06,
"learning_rate": 1.94726930970262e-05,
"loss": 0.0208,
"step": 30850
},
{
"epoch": 0.026660500527170415,
"grad_norm": 0.00038789489190094173,
"learning_rate": 1.947183007427174e-05,
"loss": 0.0221,
"step": 30900
},
{
"epoch": 0.02670364049566098,
"grad_norm": 0.006069442722946405,
"learning_rate": 1.9470967051517283e-05,
"loss": 0.0332,
"step": 30950
},
{
"epoch": 0.02674678046415155,
"grad_norm": 2.1992854204455625e-09,
"learning_rate": 1.9470104028762823e-05,
"loss": 0.0133,
"step": 31000
},
{
"epoch": 0.026789920432642115,
"grad_norm": 0.0005120674031786621,
"learning_rate": 1.9469241006008366e-05,
"loss": 0.0549,
"step": 31050
},
{
"epoch": 0.02683306040113268,
"grad_norm": 3.589123298297636e-05,
"learning_rate": 1.946837798325391e-05,
"loss": 0.0172,
"step": 31100
},
{
"epoch": 0.02687620036962325,
"grad_norm": 4.615823812059716e-08,
"learning_rate": 1.946751496049945e-05,
"loss": 0.013,
"step": 31150
},
{
"epoch": 0.026919340338113818,
"grad_norm": 7.231820475794848e-09,
"learning_rate": 1.9466651937744993e-05,
"loss": 0.0037,
"step": 31200
},
{
"epoch": 0.026962480306604385,
"grad_norm": 5.052131157867734e-09,
"learning_rate": 1.9465788914990532e-05,
"loss": 0.0491,
"step": 31250
},
{
"epoch": 0.02700562027509495,
"grad_norm": 0.00010309406206943095,
"learning_rate": 1.9464925892236076e-05,
"loss": 0.0028,
"step": 31300
},
{
"epoch": 0.027048760243585518,
"grad_norm": 5.6031745771178976e-05,
"learning_rate": 1.946406286948162e-05,
"loss": 0.015,
"step": 31350
},
{
"epoch": 0.027091900212076084,
"grad_norm": 0.00024476449470967054,
"learning_rate": 1.9463199846727162e-05,
"loss": 0.0154,
"step": 31400
},
{
"epoch": 0.02713504018056665,
"grad_norm": 2.0063467331965512e-07,
"learning_rate": 1.9462336823972702e-05,
"loss": 0.0212,
"step": 31450
},
{
"epoch": 0.027178180149057218,
"grad_norm": 6.659844075329602e-05,
"learning_rate": 1.9461473801218246e-05,
"loss": 0.0216,
"step": 31500
},
{
"epoch": 0.027221320117547788,
"grad_norm": 4.053091470268555e-05,
"learning_rate": 1.9460610778463786e-05,
"loss": 0.026,
"step": 31550
},
{
"epoch": 0.027264460086038354,
"grad_norm": 2.6744512382492758e-08,
"learning_rate": 1.945974775570933e-05,
"loss": 0.0284,
"step": 31600
},
{
"epoch": 0.02730760005452892,
"grad_norm": 0.1950395703315735,
"learning_rate": 1.945888473295487e-05,
"loss": 0.0064,
"step": 31650
},
{
"epoch": 0.027350740023019487,
"grad_norm": 41.71430587768555,
"learning_rate": 1.9458021710200412e-05,
"loss": 0.0379,
"step": 31700
},
{
"epoch": 0.027393879991510054,
"grad_norm": 2.8257717943347416e-08,
"learning_rate": 1.9457158687445952e-05,
"loss": 0.0263,
"step": 31750
},
{
"epoch": 0.02743701996000062,
"grad_norm": 0.002763712080195546,
"learning_rate": 1.9456295664691496e-05,
"loss": 0.0189,
"step": 31800
},
{
"epoch": 0.027480159928491187,
"grad_norm": 1.0972726061098115e-09,
"learning_rate": 1.945543264193704e-05,
"loss": 0.0156,
"step": 31850
},
{
"epoch": 0.027523299896981754,
"grad_norm": 0.0012834984809160233,
"learning_rate": 1.9454569619182582e-05,
"loss": 0.0076,
"step": 31900
},
{
"epoch": 0.027566439865472324,
"grad_norm": 2.497445628080186e-08,
"learning_rate": 1.9453706596428122e-05,
"loss": 0.0209,
"step": 31950
},
{
"epoch": 0.02760957983396289,
"grad_norm": 23.704517364501953,
"learning_rate": 1.9452843573673666e-05,
"loss": 0.0603,
"step": 32000
},
{
"epoch": 0.027652719802453457,
"grad_norm": 0.0009068456711247563,
"learning_rate": 1.945198055091921e-05,
"loss": 0.035,
"step": 32050
},
{
"epoch": 0.027695859770944024,
"grad_norm": 5.298162460327148,
"learning_rate": 1.945111752816475e-05,
"loss": 0.0053,
"step": 32100
},
{
"epoch": 0.02773899973943459,
"grad_norm": 0.017380917444825172,
"learning_rate": 1.9450254505410292e-05,
"loss": 0.0471,
"step": 32150
},
{
"epoch": 0.027782139707925157,
"grad_norm": 0.02581915073096752,
"learning_rate": 1.9449391482655832e-05,
"loss": 0.0396,
"step": 32200
},
{
"epoch": 0.027825279676415723,
"grad_norm": 1.437704066908907e-09,
"learning_rate": 1.9448528459901375e-05,
"loss": 0.0283,
"step": 32250
},
{
"epoch": 0.02786841964490629,
"grad_norm": 1.0882466483508324e-08,
"learning_rate": 1.9447665437146915e-05,
"loss": 0.0066,
"step": 32300
},
{
"epoch": 0.02791155961339686,
"grad_norm": 5.027173122229556e-11,
"learning_rate": 1.9446802414392462e-05,
"loss": 0.0082,
"step": 32350
},
{
"epoch": 0.027954699581887427,
"grad_norm": 4.071168899536133,
"learning_rate": 1.9445939391638002e-05,
"loss": 0.0217,
"step": 32400
},
{
"epoch": 0.027997839550377993,
"grad_norm": 0.0017136979149654508,
"learning_rate": 1.9445076368883545e-05,
"loss": 0.0665,
"step": 32450
},
{
"epoch": 0.02804097951886856,
"grad_norm": 1.7071112301536573e-09,
"learning_rate": 1.9444213346129085e-05,
"loss": 0.0283,
"step": 32500
},
{
"epoch": 0.028084119487359126,
"grad_norm": 2.8745741897928667e-10,
"learning_rate": 1.944335032337463e-05,
"loss": 0.0165,
"step": 32550
},
{
"epoch": 0.028127259455849693,
"grad_norm": 0.06553611904382706,
"learning_rate": 1.944248730062017e-05,
"loss": 0.0039,
"step": 32600
},
{
"epoch": 0.02817039942434026,
"grad_norm": 1.2114237506466452e-05,
"learning_rate": 1.9441624277865712e-05,
"loss": 0.0053,
"step": 32650
},
{
"epoch": 0.028213539392830826,
"grad_norm": 5.977819910185644e-06,
"learning_rate": 1.9440761255111252e-05,
"loss": 0.016,
"step": 32700
},
{
"epoch": 0.028256679361321393,
"grad_norm": 0.00414885301142931,
"learning_rate": 1.9439898232356795e-05,
"loss": 0.0064,
"step": 32750
},
{
"epoch": 0.028299819329811963,
"grad_norm": 0.001667422242462635,
"learning_rate": 1.943903520960234e-05,
"loss": 0.0013,
"step": 32800
},
{
"epoch": 0.02834295929830253,
"grad_norm": 1.7196412045450415e-06,
"learning_rate": 1.943817218684788e-05,
"loss": 0.0022,
"step": 32850
},
{
"epoch": 0.028386099266793096,
"grad_norm": 4.220390792397666e-07,
"learning_rate": 1.9437309164093422e-05,
"loss": 0.0278,
"step": 32900
},
{
"epoch": 0.028429239235283663,
"grad_norm": 8.6249691833018e-09,
"learning_rate": 1.9436446141338965e-05,
"loss": 0.0155,
"step": 32950
},
{
"epoch": 0.02847237920377423,
"grad_norm": 21.435453414916992,
"learning_rate": 1.9435583118584505e-05,
"loss": 0.0234,
"step": 33000
},
{
"epoch": 0.028515519172264796,
"grad_norm": 9.135671461990569e-06,
"learning_rate": 1.943472009583005e-05,
"loss": 0.0028,
"step": 33050
},
{
"epoch": 0.028558659140755362,
"grad_norm": 1.085790088950489e-07,
"learning_rate": 1.9433857073075592e-05,
"loss": 0.0189,
"step": 33100
},
{
"epoch": 0.02860179910924593,
"grad_norm": 1.0733113288879395,
"learning_rate": 1.9432994050321132e-05,
"loss": 0.0188,
"step": 33150
},
{
"epoch": 0.0286449390777365,
"grad_norm": 5.325038046066766e-07,
"learning_rate": 1.9432131027566675e-05,
"loss": 0.0025,
"step": 33200
},
{
"epoch": 0.028688079046227066,
"grad_norm": 0.001730454503558576,
"learning_rate": 1.9431268004812215e-05,
"loss": 0.0429,
"step": 33250
},
{
"epoch": 0.028731219014717632,
"grad_norm": 0.03524341806769371,
"learning_rate": 1.943040498205776e-05,
"loss": 0.0147,
"step": 33300
},
{
"epoch": 0.0287743589832082,
"grad_norm": 8.027368769703003e-10,
"learning_rate": 1.9429541959303298e-05,
"loss": 0.0074,
"step": 33350
},
{
"epoch": 0.028817498951698765,
"grad_norm": 2.603889299734874e-07,
"learning_rate": 1.9428678936548845e-05,
"loss": 0.0015,
"step": 33400
},
{
"epoch": 0.028860638920189332,
"grad_norm": 12.171298027038574,
"learning_rate": 1.9427815913794385e-05,
"loss": 0.0188,
"step": 33450
},
{
"epoch": 0.0289037788886799,
"grad_norm": 3.4058632536471123e-06,
"learning_rate": 1.9426952891039928e-05,
"loss": 0.0529,
"step": 33500
},
{
"epoch": 0.028946918857170465,
"grad_norm": 17.399200439453125,
"learning_rate": 1.9426089868285468e-05,
"loss": 0.0294,
"step": 33550
},
{
"epoch": 0.028990058825661035,
"grad_norm": 0.011678768321871758,
"learning_rate": 1.942522684553101e-05,
"loss": 0.0211,
"step": 33600
},
{
"epoch": 0.029033198794151602,
"grad_norm": 2.466938212819514e-06,
"learning_rate": 1.942436382277655e-05,
"loss": 0.03,
"step": 33650
},
{
"epoch": 0.02907633876264217,
"grad_norm": 3.6094334986136456e-12,
"learning_rate": 1.9423500800022095e-05,
"loss": 0.0381,
"step": 33700
},
{
"epoch": 0.029119478731132735,
"grad_norm": 0.08116328716278076,
"learning_rate": 1.9422637777267638e-05,
"loss": 0.0016,
"step": 33750
},
{
"epoch": 0.0291626186996233,
"grad_norm": 0.2594936788082123,
"learning_rate": 1.9421774754513178e-05,
"loss": 0.0145,
"step": 33800
},
{
"epoch": 0.029205758668113868,
"grad_norm": 1.6326714103342965e-05,
"learning_rate": 1.942091173175872e-05,
"loss": 0.014,
"step": 33850
},
{
"epoch": 0.029248898636604435,
"grad_norm": 6.704578368044167e-07,
"learning_rate": 1.942004870900426e-05,
"loss": 0.0138,
"step": 33900
},
{
"epoch": 0.029292038605095,
"grad_norm": 1.600632737464025e-09,
"learning_rate": 1.9419185686249805e-05,
"loss": 0.0044,
"step": 33950
},
{
"epoch": 0.02933517857358557,
"grad_norm": 2.9473580070771277e-05,
"learning_rate": 1.9418322663495348e-05,
"loss": 0.0209,
"step": 34000
},
{
"epoch": 0.029378318542076138,
"grad_norm": 0.013792168349027634,
"learning_rate": 1.941745964074089e-05,
"loss": 0.009,
"step": 34050
},
{
"epoch": 0.029421458510566705,
"grad_norm": 1.5911604123175493e-07,
"learning_rate": 1.941659661798643e-05,
"loss": 0.0272,
"step": 34100
},
{
"epoch": 0.02946459847905727,
"grad_norm": 0.29515737295150757,
"learning_rate": 1.9415733595231975e-05,
"loss": 0.0595,
"step": 34150
},
{
"epoch": 0.029507738447547838,
"grad_norm": 2.744394862475019e-07,
"learning_rate": 1.9414870572477515e-05,
"loss": 0.046,
"step": 34200
},
{
"epoch": 0.029550878416038404,
"grad_norm": 0.028887495398521423,
"learning_rate": 1.9414007549723058e-05,
"loss": 0.0014,
"step": 34250
},
{
"epoch": 0.02959401838452897,
"grad_norm": 1.5995985449990258e-05,
"learning_rate": 1.9413144526968598e-05,
"loss": 0.0072,
"step": 34300
},
{
"epoch": 0.029637158353019537,
"grad_norm": 1.774524207576178e-05,
"learning_rate": 1.941228150421414e-05,
"loss": 0.0072,
"step": 34350
},
{
"epoch": 0.029680298321510108,
"grad_norm": 3.840292084333896e-09,
"learning_rate": 1.9411418481459685e-05,
"loss": 0.015,
"step": 34400
},
{
"epoch": 0.029723438290000674,
"grad_norm": 4.855828592553735e-06,
"learning_rate": 1.9410555458705225e-05,
"loss": 0.0101,
"step": 34450
},
{
"epoch": 0.02976657825849124,
"grad_norm": 5.043638229370117,
"learning_rate": 1.9409692435950768e-05,
"loss": 0.0598,
"step": 34500
},
{
"epoch": 0.029809718226981807,
"grad_norm": 3.365451473058556e-09,
"learning_rate": 1.940882941319631e-05,
"loss": 0.012,
"step": 34550
},
{
"epoch": 0.029852858195472374,
"grad_norm": 2.5963392999983625e-06,
"learning_rate": 1.940796639044185e-05,
"loss": 0.0195,
"step": 34600
},
{
"epoch": 0.02989599816396294,
"grad_norm": 0.0003348338359501213,
"learning_rate": 1.9407103367687394e-05,
"loss": 0.0289,
"step": 34650
},
{
"epoch": 0.029939138132453507,
"grad_norm": 6.386066436767578,
"learning_rate": 1.9406240344932938e-05,
"loss": 0.0308,
"step": 34700
},
{
"epoch": 0.029982278100944074,
"grad_norm": 0.00012195282033644617,
"learning_rate": 1.9405377322178478e-05,
"loss": 0.0522,
"step": 34750
},
{
"epoch": 0.030025418069434644,
"grad_norm": 0.0025203858967870474,
"learning_rate": 1.940451429942402e-05,
"loss": 0.0275,
"step": 34800
},
{
"epoch": 0.03006855803792521,
"grad_norm": 4.238718820381848e-10,
"learning_rate": 1.940365127666956e-05,
"loss": 0.0164,
"step": 34850
},
{
"epoch": 0.030111698006415777,
"grad_norm": 1.477847000330712e-08,
"learning_rate": 1.9402788253915104e-05,
"loss": 0.0227,
"step": 34900
},
{
"epoch": 0.030154837974906343,
"grad_norm": 8.416482621953492e-09,
"learning_rate": 1.9401925231160644e-05,
"loss": 0.0379,
"step": 34950
},
{
"epoch": 0.03019797794339691,
"grad_norm": 2.9379866646195296e-06,
"learning_rate": 1.940106220840619e-05,
"loss": 0.0449,
"step": 35000
},
{
"epoch": 0.030241117911887477,
"grad_norm": 13.662910461425781,
"learning_rate": 1.940019918565173e-05,
"loss": 0.0245,
"step": 35050
},
{
"epoch": 0.030284257880378043,
"grad_norm": 2.694193881325191e-06,
"learning_rate": 1.9399336162897274e-05,
"loss": 0.0231,
"step": 35100
},
{
"epoch": 0.03032739784886861,
"grad_norm": 19.55348014831543,
"learning_rate": 1.9398473140142814e-05,
"loss": 0.0253,
"step": 35150
},
{
"epoch": 0.03037053781735918,
"grad_norm": 7.588599970631549e-09,
"learning_rate": 1.9397610117388358e-05,
"loss": 0.026,
"step": 35200
},
{
"epoch": 0.030413677785849746,
"grad_norm": 6.923779882761494e-10,
"learning_rate": 1.9396747094633898e-05,
"loss": 0.008,
"step": 35250
},
{
"epoch": 0.030456817754340313,
"grad_norm": 5.178381456971692e-07,
"learning_rate": 1.939588407187944e-05,
"loss": 0.0512,
"step": 35300
},
{
"epoch": 0.03049995772283088,
"grad_norm": 3.179905760930524e-08,
"learning_rate": 1.939502104912498e-05,
"loss": 0.0314,
"step": 35350
},
{
"epoch": 0.030543097691321446,
"grad_norm": 0.00010464258957654238,
"learning_rate": 1.9394158026370524e-05,
"loss": 0.0015,
"step": 35400
},
{
"epoch": 0.030586237659812013,
"grad_norm": 11.300006866455078,
"learning_rate": 1.9393295003616067e-05,
"loss": 0.0298,
"step": 35450
},
{
"epoch": 0.03062937762830258,
"grad_norm": 1.0112120918392975e-07,
"learning_rate": 1.9392431980861607e-05,
"loss": 0.0235,
"step": 35500
},
{
"epoch": 0.030672517596793146,
"grad_norm": 0.0002930278715211898,
"learning_rate": 1.939156895810715e-05,
"loss": 0.0422,
"step": 35550
},
{
"epoch": 0.030715657565283716,
"grad_norm": 3.265949146680214e-07,
"learning_rate": 1.9390705935352694e-05,
"loss": 0.0453,
"step": 35600
},
{
"epoch": 0.030758797533774283,
"grad_norm": 0.01071107853204012,
"learning_rate": 1.9389842912598234e-05,
"loss": 0.0088,
"step": 35650
},
{
"epoch": 0.03080193750226485,
"grad_norm": 2.198061288538611e-09,
"learning_rate": 1.9388979889843777e-05,
"loss": 0.0344,
"step": 35700
},
{
"epoch": 0.030845077470755416,
"grad_norm": 2.0178050874619657e-07,
"learning_rate": 1.938811686708932e-05,
"loss": 0.0112,
"step": 35750
},
{
"epoch": 0.030888217439245982,
"grad_norm": 0.03751551732420921,
"learning_rate": 1.938725384433486e-05,
"loss": 0.0112,
"step": 35800
},
{
"epoch": 0.03093135740773655,
"grad_norm": 0.00011108023318229243,
"learning_rate": 1.9386390821580404e-05,
"loss": 0.0275,
"step": 35850
},
{
"epoch": 0.030974497376227116,
"grad_norm": 1.5553026644354873e-09,
"learning_rate": 1.9385527798825944e-05,
"loss": 0.0118,
"step": 35900
},
{
"epoch": 0.031017637344717682,
"grad_norm": 2.6839693418878596e-06,
"learning_rate": 1.9384664776071487e-05,
"loss": 0.0054,
"step": 35950
},
{
"epoch": 0.03106077731320825,
"grad_norm": 2.178272318076324e-08,
"learning_rate": 1.9383801753317027e-05,
"loss": 0.0331,
"step": 36000
},
{
"epoch": 0.03110391728169882,
"grad_norm": 2.3207785204704123e-07,
"learning_rate": 1.9382938730562574e-05,
"loss": 0.0102,
"step": 36050
},
{
"epoch": 0.031147057250189385,
"grad_norm": 1.738131345518923e-07,
"learning_rate": 1.9382075707808114e-05,
"loss": 0.0588,
"step": 36100
},
{
"epoch": 0.031190197218679952,
"grad_norm": 0.019147371873259544,
"learning_rate": 1.9381212685053657e-05,
"loss": 0.043,
"step": 36150
},
{
"epoch": 0.03123333718717052,
"grad_norm": 0.0022545859683305025,
"learning_rate": 1.9380349662299197e-05,
"loss": 0.0191,
"step": 36200
},
{
"epoch": 0.031276477155661085,
"grad_norm": 0.00014786762767471373,
"learning_rate": 1.937948663954474e-05,
"loss": 0.0016,
"step": 36250
},
{
"epoch": 0.031319617124151655,
"grad_norm": 1.8323513018003723e-07,
"learning_rate": 1.937862361679028e-05,
"loss": 0.0007,
"step": 36300
},
{
"epoch": 0.03136275709264222,
"grad_norm": 15.16702651977539,
"learning_rate": 1.9377760594035824e-05,
"loss": 0.0363,
"step": 36350
},
{
"epoch": 0.03140589706113279,
"grad_norm": 0.061391185969114304,
"learning_rate": 1.9376897571281367e-05,
"loss": 0.0393,
"step": 36400
},
{
"epoch": 0.03144903702962335,
"grad_norm": 0.0035098083317279816,
"learning_rate": 1.9376034548526907e-05,
"loss": 0.0147,
"step": 36450
},
{
"epoch": 0.03149217699811392,
"grad_norm": 0.06623140722513199,
"learning_rate": 1.937517152577245e-05,
"loss": 0.0543,
"step": 36500
},
{
"epoch": 0.031535316966604485,
"grad_norm": 8.011748832359444e-06,
"learning_rate": 1.937430850301799e-05,
"loss": 0.0447,
"step": 36550
},
{
"epoch": 0.031578456935095055,
"grad_norm": 2.976227278850274e-06,
"learning_rate": 1.9373445480263534e-05,
"loss": 0.0238,
"step": 36600
},
{
"epoch": 0.031621596903585625,
"grad_norm": 4.54370677971383e-07,
"learning_rate": 1.9372582457509077e-05,
"loss": 0.0282,
"step": 36650
},
{
"epoch": 0.03166473687207619,
"grad_norm": 1.2593355247503268e-09,
"learning_rate": 1.937171943475462e-05,
"loss": 0.0475,
"step": 36700
},
{
"epoch": 0.03170787684056676,
"grad_norm": 0.0001775699929567054,
"learning_rate": 1.937085641200016e-05,
"loss": 0.0005,
"step": 36750
},
{
"epoch": 0.03175101680905732,
"grad_norm": 1.9041050336454646e-07,
"learning_rate": 1.9369993389245704e-05,
"loss": 0.0008,
"step": 36800
},
{
"epoch": 0.03179415677754789,
"grad_norm": 0.0002166083868360147,
"learning_rate": 1.9369130366491244e-05,
"loss": 0.0064,
"step": 36850
},
{
"epoch": 0.031837296746038454,
"grad_norm": 2.4730157921482032e-09,
"learning_rate": 1.9368267343736787e-05,
"loss": 0.0747,
"step": 36900
},
{
"epoch": 0.031880436714529024,
"grad_norm": 6.864386705274228e-06,
"learning_rate": 1.9367404320982327e-05,
"loss": 0.0022,
"step": 36950
},
{
"epoch": 0.03192357668301959,
"grad_norm": 2.638907517393818e-06,
"learning_rate": 1.936654129822787e-05,
"loss": 0.0239,
"step": 37000
},
{
"epoch": 0.03196671665151016,
"grad_norm": 8.631070522824302e-05,
"learning_rate": 1.9365678275473413e-05,
"loss": 0.0191,
"step": 37050
},
{
"epoch": 0.03200985662000073,
"grad_norm": 14.52698802947998,
"learning_rate": 1.9364815252718953e-05,
"loss": 0.0188,
"step": 37100
},
{
"epoch": 0.03205299658849129,
"grad_norm": 0.07407932728528976,
"learning_rate": 1.9363952229964497e-05,
"loss": 0.0136,
"step": 37150
},
{
"epoch": 0.03209613655698186,
"grad_norm": 0.002848062664270401,
"learning_rate": 1.936308920721004e-05,
"loss": 0.0451,
"step": 37200
},
{
"epoch": 0.032139276525472424,
"grad_norm": 2.2414766931433405e-07,
"learning_rate": 1.936222618445558e-05,
"loss": 0.0395,
"step": 37250
},
{
"epoch": 0.032182416493962994,
"grad_norm": 5.524349830920983e-07,
"learning_rate": 1.9361363161701123e-05,
"loss": 0.0468,
"step": 37300
},
{
"epoch": 0.03222555646245356,
"grad_norm": 2.2004120182828046e-05,
"learning_rate": 1.9360500138946667e-05,
"loss": 0.0599,
"step": 37350
},
{
"epoch": 0.03226869643094413,
"grad_norm": 5.064206831661977e-08,
"learning_rate": 1.9359637116192207e-05,
"loss": 0.0191,
"step": 37400
},
{
"epoch": 0.0323118363994347,
"grad_norm": 5.038096060161479e-05,
"learning_rate": 1.935877409343775e-05,
"loss": 0.0094,
"step": 37450
},
{
"epoch": 0.03235497636792526,
"grad_norm": 0.002139901742339134,
"learning_rate": 1.935791107068329e-05,
"loss": 0.0026,
"step": 37500
},
{
"epoch": 0.03239811633641583,
"grad_norm": 0.025793571025133133,
"learning_rate": 1.9357048047928833e-05,
"loss": 0.0503,
"step": 37550
},
{
"epoch": 0.03244125630490639,
"grad_norm": 1.497374176979065,
"learning_rate": 1.9356185025174373e-05,
"loss": 0.0239,
"step": 37600
},
{
"epoch": 0.032484396273396964,
"grad_norm": 9.68094241216022e-07,
"learning_rate": 1.935532200241992e-05,
"loss": 0.0362,
"step": 37650
},
{
"epoch": 0.03252753624188753,
"grad_norm": 7.437192266479542e-07,
"learning_rate": 1.935445897966546e-05,
"loss": 0.0174,
"step": 37700
},
{
"epoch": 0.0325706762103781,
"grad_norm": 1.591896947594762e-09,
"learning_rate": 1.9353595956911003e-05,
"loss": 0.0253,
"step": 37750
},
{
"epoch": 0.03261381617886866,
"grad_norm": 14.039113998413086,
"learning_rate": 1.9352732934156543e-05,
"loss": 0.0201,
"step": 37800
},
{
"epoch": 0.03265695614735923,
"grad_norm": 2.0073053747182712e-05,
"learning_rate": 1.9351869911402087e-05,
"loss": 0.043,
"step": 37850
},
{
"epoch": 0.0327000961158498,
"grad_norm": 1.3844499768822516e-08,
"learning_rate": 1.9351006888647626e-05,
"loss": 0.007,
"step": 37900
},
{
"epoch": 0.03274323608434036,
"grad_norm": 0.02289557084441185,
"learning_rate": 1.935014386589317e-05,
"loss": 0.0268,
"step": 37950
},
{
"epoch": 0.03278637605283093,
"grad_norm": 2.7390053766729316e-11,
"learning_rate": 1.934928084313871e-05,
"loss": 0.0141,
"step": 38000
},
{
"epoch": 0.032829516021321496,
"grad_norm": 2.0595265937117802e-07,
"learning_rate": 1.9348417820384253e-05,
"loss": 0.1202,
"step": 38050
},
{
"epoch": 0.032872655989812066,
"grad_norm": 0.00014018621004652232,
"learning_rate": 1.9347554797629796e-05,
"loss": 0.0277,
"step": 38100
},
{
"epoch": 0.03291579595830263,
"grad_norm": 9.558748570270836e-05,
"learning_rate": 1.9346691774875336e-05,
"loss": 0.0498,
"step": 38150
},
{
"epoch": 0.0329589359267932,
"grad_norm": 2.20267253325801e-07,
"learning_rate": 1.934582875212088e-05,
"loss": 0.0357,
"step": 38200
},
{
"epoch": 0.03300207589528377,
"grad_norm": 0.002117832424119115,
"learning_rate": 1.9344965729366423e-05,
"loss": 0.0478,
"step": 38250
},
{
"epoch": 0.03304521586377433,
"grad_norm": 0.0015125697245821357,
"learning_rate": 1.9344102706611963e-05,
"loss": 0.0049,
"step": 38300
},
{
"epoch": 0.0330883558322649,
"grad_norm": 0.001929111429490149,
"learning_rate": 1.9343239683857506e-05,
"loss": 0.0321,
"step": 38350
},
{
"epoch": 0.033131495800755466,
"grad_norm": 14.052818298339844,
"learning_rate": 1.934237666110305e-05,
"loss": 0.0126,
"step": 38400
},
{
"epoch": 0.033174635769246036,
"grad_norm": 0.04780351743102074,
"learning_rate": 1.934151363834859e-05,
"loss": 0.0145,
"step": 38450
},
{
"epoch": 0.0332177757377366,
"grad_norm": 1.625859908926941e-07,
"learning_rate": 1.9340650615594133e-05,
"loss": 0.0006,
"step": 38500
},
{
"epoch": 0.03326091570622717,
"grad_norm": 4.171390173723921e-06,
"learning_rate": 1.9339787592839673e-05,
"loss": 0.0052,
"step": 38550
},
{
"epoch": 0.03330405567471773,
"grad_norm": 9.933991532307118e-05,
"learning_rate": 1.9338924570085216e-05,
"loss": 0.0149,
"step": 38600
},
{
"epoch": 0.0333471956432083,
"grad_norm": 5.527433510899016e-10,
"learning_rate": 1.9338061547330756e-05,
"loss": 0.0569,
"step": 38650
},
{
"epoch": 0.03339033561169887,
"grad_norm": 1.7711924149566016e-09,
"learning_rate": 1.9337198524576303e-05,
"loss": 0.0089,
"step": 38700
},
{
"epoch": 0.033433475580189435,
"grad_norm": 5.876652497960322e-09,
"learning_rate": 1.9336335501821843e-05,
"loss": 0.0412,
"step": 38750
},
{
"epoch": 0.033476615548680005,
"grad_norm": 1.2611899375915527,
"learning_rate": 1.9335472479067386e-05,
"loss": 0.0057,
"step": 38800
},
{
"epoch": 0.03351975551717057,
"grad_norm": 0.00011541438288986683,
"learning_rate": 1.9334609456312926e-05,
"loss": 0.0264,
"step": 38850
},
{
"epoch": 0.03356289548566114,
"grad_norm": 0.7902683019638062,
"learning_rate": 1.933374643355847e-05,
"loss": 0.0269,
"step": 38900
},
{
"epoch": 0.0336060354541517,
"grad_norm": 1.6534098904230632e-05,
"learning_rate": 1.933288341080401e-05,
"loss": 0.0041,
"step": 38950
},
{
"epoch": 0.03364917542264227,
"grad_norm": 0.029098449274897575,
"learning_rate": 1.9332020388049553e-05,
"loss": 0.0208,
"step": 39000
},
{
"epoch": 0.033692315391132835,
"grad_norm": 0.0004794780688825995,
"learning_rate": 1.9331157365295096e-05,
"loss": 0.0595,
"step": 39050
},
{
"epoch": 0.033735455359623405,
"grad_norm": 16.320070266723633,
"learning_rate": 1.9330294342540636e-05,
"loss": 0.0735,
"step": 39100
},
{
"epoch": 0.033778595328113975,
"grad_norm": 3.635158840609165e-09,
"learning_rate": 1.932943131978618e-05,
"loss": 0.0164,
"step": 39150
},
{
"epoch": 0.03382173529660454,
"grad_norm": 1.406357796440716e-06,
"learning_rate": 1.932856829703172e-05,
"loss": 0.0237,
"step": 39200
},
{
"epoch": 0.03386487526509511,
"grad_norm": 0.05031180754303932,
"learning_rate": 1.9327705274277263e-05,
"loss": 0.0264,
"step": 39250
},
{
"epoch": 0.03390801523358567,
"grad_norm": 0.022205352783203125,
"learning_rate": 1.9326842251522806e-05,
"loss": 0.0076,
"step": 39300
},
{
"epoch": 0.03395115520207624,
"grad_norm": 3.1427214707946405e-05,
"learning_rate": 1.932597922876835e-05,
"loss": 0.0093,
"step": 39350
},
{
"epoch": 0.033994295170566804,
"grad_norm": 0.0015017461264505982,
"learning_rate": 1.932511620601389e-05,
"loss": 0.0016,
"step": 39400
},
{
"epoch": 0.034037435139057375,
"grad_norm": 3.3295341483885466e-10,
"learning_rate": 1.9324253183259433e-05,
"loss": 0.0297,
"step": 39450
},
{
"epoch": 0.034080575107547945,
"grad_norm": 1.431539747853705e-10,
"learning_rate": 1.9323390160504972e-05,
"loss": 0.008,
"step": 39500
},
{
"epoch": 0.03412371507603851,
"grad_norm": 9.472168188695562e-11,
"learning_rate": 1.9322527137750516e-05,
"loss": 0.0526,
"step": 39550
},
{
"epoch": 0.03416685504452908,
"grad_norm": 1.1010347078510563e-09,
"learning_rate": 1.9321664114996056e-05,
"loss": 0.0438,
"step": 39600
},
{
"epoch": 0.03420999501301964,
"grad_norm": 0.0038324242923408747,
"learning_rate": 1.93208010922416e-05,
"loss": 0.0068,
"step": 39650
},
{
"epoch": 0.03425313498151021,
"grad_norm": 1.2454121067762003e-10,
"learning_rate": 1.9319938069487142e-05,
"loss": 0.0105,
"step": 39700
},
{
"epoch": 0.034296274950000774,
"grad_norm": 3.0910987103283105e-09,
"learning_rate": 1.9319075046732682e-05,
"loss": 0.003,
"step": 39750
},
{
"epoch": 0.034339414918491344,
"grad_norm": 0.20711366832256317,
"learning_rate": 1.9318212023978226e-05,
"loss": 0.0072,
"step": 39800
},
{
"epoch": 0.03438255488698191,
"grad_norm": 0.0013983896933495998,
"learning_rate": 1.931734900122377e-05,
"loss": 0.0357,
"step": 39850
},
{
"epoch": 0.03442569485547248,
"grad_norm": 1.195646859741828e-06,
"learning_rate": 1.931648597846931e-05,
"loss": 0.0252,
"step": 39900
},
{
"epoch": 0.03446883482396305,
"grad_norm": 0.0007419702014885843,
"learning_rate": 1.9315622955714852e-05,
"loss": 0.0203,
"step": 39950
},
{
"epoch": 0.03451197479245361,
"grad_norm": 1.9399341908865608e-05,
"learning_rate": 1.9314759932960396e-05,
"loss": 0.0273,
"step": 40000
}
],
"logging_steps": 50,
"max_steps": 1159018,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 2500,
"total_flos": 0.0,
"train_batch_size": 8,
"trial_name": null,
"trial_params": null
}