android_rag_48_LR_1e-5_epoch_1 / trainer_state.json
Rubywong123's picture
Upload folder using huggingface_hub
c26f287 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.9998997292690264,
"eval_steps": 500,
"global_step": 831,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0012032487716835455,
"grad_norm": 1.0745393392299216,
"learning_rate": 1.1904761904761906e-07,
"loss": 0.7396,
"step": 1
},
{
"epoch": 0.006016243858417728,
"grad_norm": 1.0096667298005468,
"learning_rate": 5.952380952380953e-07,
"loss": 0.7345,
"step": 5
},
{
"epoch": 0.012032487716835455,
"grad_norm": 0.5432840285599178,
"learning_rate": 1.1904761904761906e-06,
"loss": 0.6995,
"step": 10
},
{
"epoch": 0.018048731575253184,
"grad_norm": 0.5379885594965839,
"learning_rate": 1.7857142857142859e-06,
"loss": 0.5364,
"step": 15
},
{
"epoch": 0.02406497543367091,
"grad_norm": 0.28279328593216746,
"learning_rate": 2.380952380952381e-06,
"loss": 0.3438,
"step": 20
},
{
"epoch": 0.03008121929208864,
"grad_norm": 0.11670701576343243,
"learning_rate": 2.9761904761904763e-06,
"loss": 0.2194,
"step": 25
},
{
"epoch": 0.03609746315050637,
"grad_norm": 0.09598554828698612,
"learning_rate": 3.5714285714285718e-06,
"loss": 0.1655,
"step": 30
},
{
"epoch": 0.0421137070089241,
"grad_norm": 0.07370159012965125,
"learning_rate": 4.166666666666667e-06,
"loss": 0.1433,
"step": 35
},
{
"epoch": 0.04812995086734182,
"grad_norm": 0.05410564768989329,
"learning_rate": 4.761904761904762e-06,
"loss": 0.1227,
"step": 40
},
{
"epoch": 0.05414619472575955,
"grad_norm": 0.049586846542590546,
"learning_rate": 5.357142857142857e-06,
"loss": 0.1097,
"step": 45
},
{
"epoch": 0.06016243858417728,
"grad_norm": 0.048103082558905504,
"learning_rate": 5.9523809523809525e-06,
"loss": 0.0978,
"step": 50
},
{
"epoch": 0.066178682442595,
"grad_norm": 0.05669271340624084,
"learning_rate": 6.547619047619048e-06,
"loss": 0.0817,
"step": 55
},
{
"epoch": 0.07219492630101274,
"grad_norm": 0.054915884901381114,
"learning_rate": 7.1428571428571436e-06,
"loss": 0.0801,
"step": 60
},
{
"epoch": 0.07821117015943047,
"grad_norm": 0.06021281263850561,
"learning_rate": 7.738095238095238e-06,
"loss": 0.0702,
"step": 65
},
{
"epoch": 0.0842274140178482,
"grad_norm": 0.039388545955733205,
"learning_rate": 8.333333333333334e-06,
"loss": 0.0619,
"step": 70
},
{
"epoch": 0.09024365787626591,
"grad_norm": 0.044736289577753,
"learning_rate": 8.92857142857143e-06,
"loss": 0.0556,
"step": 75
},
{
"epoch": 0.09625990173468364,
"grad_norm": 0.04237369775091706,
"learning_rate": 9.523809523809525e-06,
"loss": 0.0523,
"step": 80
},
{
"epoch": 0.10227614559310137,
"grad_norm": 0.04699046038689833,
"learning_rate": 9.999955782120656e-06,
"loss": 0.0497,
"step": 85
},
{
"epoch": 0.1082923894515191,
"grad_norm": 0.03964650441405405,
"learning_rate": 9.99840823846134e-06,
"loss": 0.0458,
"step": 90
},
{
"epoch": 0.11430863330993683,
"grad_norm": 0.03621563631742996,
"learning_rate": 9.994650582860978e-06,
"loss": 0.0477,
"step": 95
},
{
"epoch": 0.12032487716835456,
"grad_norm": 0.0268237826645417,
"learning_rate": 9.98868447681642e-06,
"loss": 0.0439,
"step": 100
},
{
"epoch": 0.12634112102677228,
"grad_norm": 0.028555915124659773,
"learning_rate": 9.980512558319915e-06,
"loss": 0.0409,
"step": 105
},
{
"epoch": 0.13235736488519,
"grad_norm": 0.026049496996790083,
"learning_rate": 9.970138440692706e-06,
"loss": 0.0414,
"step": 110
},
{
"epoch": 0.13837360874360774,
"grad_norm": 0.03147552348817025,
"learning_rate": 9.957566710987338e-06,
"loss": 0.0406,
"step": 115
},
{
"epoch": 0.14438985260202547,
"grad_norm": 0.027902049215008537,
"learning_rate": 9.942802927959444e-06,
"loss": 0.0392,
"step": 120
},
{
"epoch": 0.1504060964604432,
"grad_norm": 0.028174099496676555,
"learning_rate": 9.925853619609858e-06,
"loss": 0.0339,
"step": 125
},
{
"epoch": 0.15642234031886093,
"grad_norm": 0.025957912802832783,
"learning_rate": 9.906726280298185e-06,
"loss": 0.0365,
"step": 130
},
{
"epoch": 0.16243858417727866,
"grad_norm": 0.026511579347961597,
"learning_rate": 9.885429367429062e-06,
"loss": 0.0365,
"step": 135
},
{
"epoch": 0.1684548280356964,
"grad_norm": 0.02801670507212045,
"learning_rate": 9.861972297712606e-06,
"loss": 0.0343,
"step": 140
},
{
"epoch": 0.17447107189411412,
"grad_norm": 0.023525196780612226,
"learning_rate": 9.836365443000697e-06,
"loss": 0.0331,
"step": 145
},
{
"epoch": 0.18048731575253182,
"grad_norm": 0.02456235083949215,
"learning_rate": 9.808620125700925e-06,
"loss": 0.0335,
"step": 150
},
{
"epoch": 0.18650355961094955,
"grad_norm": 0.021953324848861307,
"learning_rate": 9.778748613770234e-06,
"loss": 0.0313,
"step": 155
},
{
"epoch": 0.19251980346936728,
"grad_norm": 0.02914752771577575,
"learning_rate": 9.746764115290496e-06,
"loss": 0.0354,
"step": 160
},
{
"epoch": 0.19853604732778501,
"grad_norm": 0.0241041372740356,
"learning_rate": 9.712680772628365e-06,
"loss": 0.0338,
"step": 165
},
{
"epoch": 0.20455229118620274,
"grad_norm": 0.02385726337646924,
"learning_rate": 9.676513656182059e-06,
"loss": 0.0343,
"step": 170
},
{
"epoch": 0.21056853504462048,
"grad_norm": 0.021532109435525824,
"learning_rate": 9.63827875771778e-06,
"loss": 0.0317,
"step": 175
},
{
"epoch": 0.2165847789030382,
"grad_norm": 0.02462556259506109,
"learning_rate": 9.597992983298748e-06,
"loss": 0.0299,
"step": 180
},
{
"epoch": 0.22260102276145594,
"grad_norm": 0.022316517368094545,
"learning_rate": 9.55567414580995e-06,
"loss": 0.0323,
"step": 185
},
{
"epoch": 0.22861726661987367,
"grad_norm": 0.02288293026305173,
"learning_rate": 9.511340957081957e-06,
"loss": 0.0307,
"step": 190
},
{
"epoch": 0.2346335104782914,
"grad_norm": 0.022260065504874377,
"learning_rate": 9.46501301961723e-06,
"loss": 0.0345,
"step": 195
},
{
"epoch": 0.24064975433670913,
"grad_norm": 0.021736470284370146,
"learning_rate": 9.416710817922615e-06,
"loss": 0.0311,
"step": 200
},
{
"epoch": 0.24666599819512683,
"grad_norm": 0.02550413340783063,
"learning_rate": 9.366455709451857e-06,
"loss": 0.0329,
"step": 205
},
{
"epoch": 0.25268224205354456,
"grad_norm": 0.02201788645460498,
"learning_rate": 9.314269915162115e-06,
"loss": 0.0309,
"step": 210
},
{
"epoch": 0.2586984859119623,
"grad_norm": 0.02936137915292986,
"learning_rate": 9.260176509688673e-06,
"loss": 0.0285,
"step": 215
},
{
"epoch": 0.26471472977038,
"grad_norm": 0.023538798060004095,
"learning_rate": 9.204199411142196e-06,
"loss": 0.0293,
"step": 220
},
{
"epoch": 0.2707309736287978,
"grad_norm": 0.018492280335574894,
"learning_rate": 9.146363370533004e-06,
"loss": 0.0281,
"step": 225
},
{
"epoch": 0.2767472174872155,
"grad_norm": 0.023852820186103463,
"learning_rate": 9.086693960827106e-06,
"loss": 0.028,
"step": 230
},
{
"epoch": 0.2827634613456332,
"grad_norm": 0.021207055501016953,
"learning_rate": 9.025217565638766e-06,
"loss": 0.0291,
"step": 235
},
{
"epoch": 0.28877970520405094,
"grad_norm": 0.01877956343623577,
"learning_rate": 8.961961367564652e-06,
"loss": 0.0282,
"step": 240
},
{
"epoch": 0.29479594906246864,
"grad_norm": 0.019669983451369902,
"learning_rate": 8.89695333616467e-06,
"loss": 0.0259,
"step": 245
},
{
"epoch": 0.3008121929208864,
"grad_norm": 0.026680075575420455,
"learning_rate": 8.83022221559489e-06,
"loss": 0.0289,
"step": 250
},
{
"epoch": 0.3068284367793041,
"grad_norm": 0.01856152788906122,
"learning_rate": 8.761797511897907e-06,
"loss": 0.0241,
"step": 255
},
{
"epoch": 0.31284468063772186,
"grad_norm": 0.01860271376372244,
"learning_rate": 8.691709479956373e-06,
"loss": 0.0272,
"step": 260
},
{
"epoch": 0.31886092449613956,
"grad_norm": 0.019865593703402643,
"learning_rate": 8.619989110115398e-06,
"loss": 0.0257,
"step": 265
},
{
"epoch": 0.3248771683545573,
"grad_norm": 0.02048991665947615,
"learning_rate": 8.546668114479769e-06,
"loss": 0.029,
"step": 270
},
{
"epoch": 0.330893412212975,
"grad_norm": 0.017217185351734676,
"learning_rate": 8.471778912892008e-06,
"loss": 0.0252,
"step": 275
},
{
"epoch": 0.3369096560713928,
"grad_norm": 0.01813603892384963,
"learning_rate": 8.395354618597533e-06,
"loss": 0.0268,
"step": 280
},
{
"epoch": 0.3429258999298105,
"grad_norm": 0.0238318384115611,
"learning_rate": 8.31742902360319e-06,
"loss": 0.0282,
"step": 285
},
{
"epoch": 0.34894214378822824,
"grad_norm": 0.019064179482916437,
"learning_rate": 8.238036583735673e-06,
"loss": 0.0271,
"step": 290
},
{
"epoch": 0.35495838764664595,
"grad_norm": 0.0222590787343455,
"learning_rate": 8.157212403406424e-06,
"loss": 0.0257,
"step": 295
},
{
"epoch": 0.36097463150506365,
"grad_norm": 0.01848676924848829,
"learning_rate": 8.07499222008977e-06,
"loss": 0.0251,
"step": 300
},
{
"epoch": 0.3669908753634814,
"grad_norm": 0.01993669886796683,
"learning_rate": 7.991412388521108e-06,
"loss": 0.0261,
"step": 305
},
{
"epoch": 0.3730071192218991,
"grad_norm": 0.019508670102946216,
"learning_rate": 7.906509864622202e-06,
"loss": 0.0258,
"step": 310
},
{
"epoch": 0.37902336308031687,
"grad_norm": 0.02181687850379784,
"learning_rate": 7.820322189160618e-06,
"loss": 0.0219,
"step": 315
},
{
"epoch": 0.38503960693873457,
"grad_norm": 0.020435112312527343,
"learning_rate": 7.732887471150589e-06,
"loss": 0.0258,
"step": 320
},
{
"epoch": 0.3910558507971523,
"grad_norm": 0.016792623507338397,
"learning_rate": 7.644244371002619e-06,
"loss": 0.0259,
"step": 325
},
{
"epoch": 0.39707209465557003,
"grad_norm": 0.025243263772643947,
"learning_rate": 7.554432083429253e-06,
"loss": 0.0239,
"step": 330
},
{
"epoch": 0.4030883385139878,
"grad_norm": 0.017995141016588313,
"learning_rate": 7.463490320114646e-06,
"loss": 0.023,
"step": 335
},
{
"epoch": 0.4091045823724055,
"grad_norm": 0.016947239802050783,
"learning_rate": 7.371459292155501e-06,
"loss": 0.0227,
"step": 340
},
{
"epoch": 0.41512082623082325,
"grad_norm": 0.01945334381344522,
"learning_rate": 7.278379692281209e-06,
"loss": 0.0236,
"step": 345
},
{
"epoch": 0.42113707008924095,
"grad_norm": 0.018112579661825487,
"learning_rate": 7.184292676861024e-06,
"loss": 0.0262,
"step": 350
},
{
"epoch": 0.42715331394765865,
"grad_norm": 0.01698932526214814,
"learning_rate": 7.0892398477062375e-06,
"loss": 0.024,
"step": 355
},
{
"epoch": 0.4331695578060764,
"grad_norm": 0.018750963386521588,
"learning_rate": 6.99326323367538e-06,
"loss": 0.0245,
"step": 360
},
{
"epoch": 0.4391858016644941,
"grad_norm": 0.02081900306974978,
"learning_rate": 6.8964052720906175e-06,
"loss": 0.026,
"step": 365
},
{
"epoch": 0.44520204552291187,
"grad_norm": 0.02082309821957481,
"learning_rate": 6.798708789973527e-06,
"loss": 0.0255,
"step": 370
},
{
"epoch": 0.4512182893813296,
"grad_norm": 0.023227812045822693,
"learning_rate": 6.700216985108568e-06,
"loss": 0.0243,
"step": 375
},
{
"epoch": 0.45723453323974733,
"grad_norm": 0.01688396438835524,
"learning_rate": 6.600973406942617e-06,
"loss": 0.0235,
"step": 380
},
{
"epoch": 0.46325077709816503,
"grad_norm": 0.020269716613110212,
"learning_rate": 6.501021937328992e-06,
"loss": 0.0215,
"step": 385
},
{
"epoch": 0.4692670209565828,
"grad_norm": 0.018025367141900818,
"learning_rate": 6.4004067711245366e-06,
"loss": 0.0221,
"step": 390
},
{
"epoch": 0.4752832648150005,
"grad_norm": 0.020858778838367335,
"learning_rate": 6.29917239664826e-06,
"loss": 0.0232,
"step": 395
},
{
"epoch": 0.48129950867341825,
"grad_norm": 0.022426616797026122,
"learning_rate": 6.1973635760102645e-06,
"loss": 0.0232,
"step": 400
},
{
"epoch": 0.48731575253183596,
"grad_norm": 0.02208916714792084,
"learning_rate": 6.0950253253195656e-06,
"loss": 0.0277,
"step": 405
},
{
"epoch": 0.49333199639025366,
"grad_norm": 0.020289821733336374,
"learning_rate": 5.9922028947796495e-06,
"loss": 0.0226,
"step": 410
},
{
"epoch": 0.4993482402486714,
"grad_norm": 0.022132901095444316,
"learning_rate": 5.888941748680484e-06,
"loss": 0.023,
"step": 415
},
{
"epoch": 0.5053644841070891,
"grad_norm": 0.02080443746491033,
"learning_rate": 5.785287545295895e-06,
"loss": 0.0214,
"step": 420
},
{
"epoch": 0.5113807279655068,
"grad_norm": 0.018676657765971747,
"learning_rate": 5.681286116695155e-06,
"loss": 0.0225,
"step": 425
},
{
"epoch": 0.5173969718239246,
"grad_norm": 0.020395399948370366,
"learning_rate": 5.5769834484777344e-06,
"loss": 0.0222,
"step": 430
},
{
"epoch": 0.5234132156823423,
"grad_norm": 0.01859239650955675,
"learning_rate": 5.472425659440157e-06,
"loss": 0.0205,
"step": 435
},
{
"epoch": 0.52942945954076,
"grad_norm": 0.01868449550665156,
"learning_rate": 5.367658981183979e-06,
"loss": 0.0232,
"step": 440
},
{
"epoch": 0.5354457033991777,
"grad_norm": 0.015161045404289166,
"learning_rate": 5.2627297376738674e-06,
"loss": 0.0203,
"step": 445
},
{
"epoch": 0.5414619472575956,
"grad_norm": 0.016845793697294737,
"learning_rate": 5.157684324754858e-06,
"loss": 0.0211,
"step": 450
},
{
"epoch": 0.5474781911160133,
"grad_norm": 0.0194074719508428,
"learning_rate": 5.052569189637813e-06,
"loss": 0.0238,
"step": 455
},
{
"epoch": 0.553494434974431,
"grad_norm": 0.01936893905137615,
"learning_rate": 4.947430810362188e-06,
"loss": 0.0216,
"step": 460
},
{
"epoch": 0.5595106788328487,
"grad_norm": 0.01859222507454881,
"learning_rate": 4.842315675245144e-06,
"loss": 0.0195,
"step": 465
},
{
"epoch": 0.5655269226912664,
"grad_norm": 0.022318020720072922,
"learning_rate": 4.737270262326134e-06,
"loss": 0.0214,
"step": 470
},
{
"epoch": 0.5715431665496842,
"grad_norm": 0.01657420129142331,
"learning_rate": 4.632341018816023e-06,
"loss": 0.0213,
"step": 475
},
{
"epoch": 0.5775594104081019,
"grad_norm": 0.02035658875684946,
"learning_rate": 4.527574340559844e-06,
"loss": 0.0226,
"step": 480
},
{
"epoch": 0.5835756542665196,
"grad_norm": 0.016236086655299523,
"learning_rate": 4.423016551522268e-06,
"loss": 0.0203,
"step": 485
},
{
"epoch": 0.5895918981249373,
"grad_norm": 0.017728340188220976,
"learning_rate": 4.318713883304846e-06,
"loss": 0.0227,
"step": 490
},
{
"epoch": 0.5956081419833551,
"grad_norm": 0.019076149038680715,
"learning_rate": 4.214712454704107e-06,
"loss": 0.0212,
"step": 495
},
{
"epoch": 0.6016243858417728,
"grad_norm": 0.015355569764402307,
"learning_rate": 4.111058251319517e-06,
"loss": 0.0201,
"step": 500
},
{
"epoch": 0.6076406297001905,
"grad_norm": 0.018415895096337096,
"learning_rate": 4.007797105220352e-06,
"loss": 0.0191,
"step": 505
},
{
"epoch": 0.6136568735586082,
"grad_norm": 0.017521505257648343,
"learning_rate": 3.904974674680436e-06,
"loss": 0.0206,
"step": 510
},
{
"epoch": 0.619673117417026,
"grad_norm": 0.017605011135453034,
"learning_rate": 3.802636423989738e-06,
"loss": 0.0212,
"step": 515
},
{
"epoch": 0.6256893612754437,
"grad_norm": 0.01785825295985011,
"learning_rate": 3.70082760335174e-06,
"loss": 0.019,
"step": 520
},
{
"epoch": 0.6317056051338614,
"grad_norm": 0.01652138449465044,
"learning_rate": 3.5995932288754655e-06,
"loss": 0.0181,
"step": 525
},
{
"epoch": 0.6377218489922791,
"grad_norm": 0.018648556241997245,
"learning_rate": 3.4989780626710103e-06,
"loss": 0.0205,
"step": 530
},
{
"epoch": 0.6437380928506968,
"grad_norm": 0.01827077862731449,
"learning_rate": 3.3990265930573863e-06,
"loss": 0.0169,
"step": 535
},
{
"epoch": 0.6497543367091146,
"grad_norm": 0.015591039180393907,
"learning_rate": 3.2997830148914316e-06,
"loss": 0.0206,
"step": 540
},
{
"epoch": 0.6557705805675323,
"grad_norm": 0.019792404271656995,
"learning_rate": 3.2012912100264743e-06,
"loss": 0.0202,
"step": 545
},
{
"epoch": 0.66178682442595,
"grad_norm": 0.018533196717200034,
"learning_rate": 3.1035947279093846e-06,
"loss": 0.021,
"step": 550
},
{
"epoch": 0.6678030682843678,
"grad_norm": 0.019636694754151293,
"learning_rate": 3.006736766324623e-06,
"loss": 0.0221,
"step": 555
},
{
"epoch": 0.6738193121427856,
"grad_norm": 0.014256034059804634,
"learning_rate": 2.9107601522937638e-06,
"loss": 0.0202,
"step": 560
},
{
"epoch": 0.6798355560012033,
"grad_norm": 0.016107622432284356,
"learning_rate": 2.8157073231389752e-06,
"loss": 0.0187,
"step": 565
},
{
"epoch": 0.685851799859621,
"grad_norm": 0.019189045539841333,
"learning_rate": 2.721620307718793e-06,
"loss": 0.0187,
"step": 570
},
{
"epoch": 0.6918680437180387,
"grad_norm": 0.014474735470360907,
"learning_rate": 2.6285407078445015e-06,
"loss": 0.021,
"step": 575
},
{
"epoch": 0.6978842875764565,
"grad_norm": 0.019262771622277575,
"learning_rate": 2.536509679885355e-06,
"loss": 0.0209,
"step": 580
},
{
"epoch": 0.7039005314348742,
"grad_norm": 0.01673939208310001,
"learning_rate": 2.4455679165707473e-06,
"loss": 0.0205,
"step": 585
},
{
"epoch": 0.7099167752932919,
"grad_norm": 0.014946535263918313,
"learning_rate": 2.3557556289973838e-06,
"loss": 0.0205,
"step": 590
},
{
"epoch": 0.7159330191517096,
"grad_norm": 0.01596788205464162,
"learning_rate": 2.2671125288494123e-06,
"loss": 0.0189,
"step": 595
},
{
"epoch": 0.7219492630101273,
"grad_norm": 0.016621734500822712,
"learning_rate": 2.1796778108393824e-06,
"loss": 0.0198,
"step": 600
},
{
"epoch": 0.7279655068685451,
"grad_norm": 0.018148193058810227,
"learning_rate": 2.0934901353777994e-06,
"loss": 0.0203,
"step": 605
},
{
"epoch": 0.7339817507269628,
"grad_norm": 0.016498667065988057,
"learning_rate": 2.008587611478894e-06,
"loss": 0.0193,
"step": 610
},
{
"epoch": 0.7399979945853805,
"grad_norm": 0.017876917503246166,
"learning_rate": 1.9250077799102323e-06,
"loss": 0.0197,
"step": 615
},
{
"epoch": 0.7460142384437982,
"grad_norm": 0.01737881401071936,
"learning_rate": 1.842787596593576e-06,
"loss": 0.02,
"step": 620
},
{
"epoch": 0.752030482302216,
"grad_norm": 0.01669399114670848,
"learning_rate": 1.761963416264329e-06,
"loss": 0.0198,
"step": 625
},
{
"epoch": 0.7580467261606337,
"grad_norm": 0.01963624059543007,
"learning_rate": 1.6825709763968112e-06,
"loss": 0.0193,
"step": 630
},
{
"epoch": 0.7640629700190514,
"grad_norm": 0.016744844031857532,
"learning_rate": 1.6046453814024671e-06,
"loss": 0.0194,
"step": 635
},
{
"epoch": 0.7700792138774691,
"grad_norm": 0.015338845138862633,
"learning_rate": 1.5282210871079929e-06,
"loss": 0.0192,
"step": 640
},
{
"epoch": 0.7760954577358868,
"grad_norm": 0.014792090312693856,
"learning_rate": 1.453331885520234e-06,
"loss": 0.0164,
"step": 645
},
{
"epoch": 0.7821117015943047,
"grad_norm": 0.01431108236231679,
"learning_rate": 1.3800108898846022e-06,
"loss": 0.0193,
"step": 650
},
{
"epoch": 0.7881279454527224,
"grad_norm": 0.01517595387399891,
"learning_rate": 1.3082905200436291e-06,
"loss": 0.0208,
"step": 655
},
{
"epoch": 0.7941441893111401,
"grad_norm": 0.01585759719142669,
"learning_rate": 1.2382024881020937e-06,
"loss": 0.019,
"step": 660
},
{
"epoch": 0.8001604331695578,
"grad_norm": 0.01823159719823014,
"learning_rate": 1.1697777844051105e-06,
"loss": 0.0198,
"step": 665
},
{
"epoch": 0.8061766770279756,
"grad_norm": 0.013515790000071682,
"learning_rate": 1.1030466638353293e-06,
"loss": 0.0187,
"step": 670
},
{
"epoch": 0.8121929208863933,
"grad_norm": 0.016225422506154382,
"learning_rate": 1.0380386324353508e-06,
"loss": 0.0175,
"step": 675
},
{
"epoch": 0.818209164744811,
"grad_norm": 0.018290959272687618,
"learning_rate": 9.74782434361234e-07,
"loss": 0.0191,
"step": 680
},
{
"epoch": 0.8242254086032287,
"grad_norm": 0.014122550894005206,
"learning_rate": 9.133060391728965e-07,
"loss": 0.0186,
"step": 685
},
{
"epoch": 0.8302416524616465,
"grad_norm": 0.01479861703335419,
"learning_rate": 8.536366294669979e-07,
"loss": 0.017,
"step": 690
},
{
"epoch": 0.8362578963200642,
"grad_norm": 0.01704145841348508,
"learning_rate": 7.958005888578063e-07,
"loss": 0.0189,
"step": 695
},
{
"epoch": 0.8422741401784819,
"grad_norm": 0.014330474670880548,
"learning_rate": 7.398234903113266e-07,
"loss": 0.0177,
"step": 700
},
{
"epoch": 0.8482903840368996,
"grad_norm": 0.016122466793252455,
"learning_rate": 6.857300848378857e-07,
"loss": 0.0186,
"step": 705
},
{
"epoch": 0.8543066278953173,
"grad_norm": 0.016531884356419155,
"learning_rate": 6.335442905481442e-07,
"loss": 0.0183,
"step": 710
},
{
"epoch": 0.8603228717537351,
"grad_norm": 0.017990075667932776,
"learning_rate": 5.832891820773868e-07,
"loss": 0.0201,
"step": 715
},
{
"epoch": 0.8663391156121528,
"grad_norm": 0.01787947586996767,
"learning_rate": 5.349869803827717e-07,
"loss": 0.0168,
"step": 720
},
{
"epoch": 0.8723553594705705,
"grad_norm": 0.016104542603383502,
"learning_rate": 4.886590429180426e-07,
"loss": 0.0179,
"step": 725
},
{
"epoch": 0.8783716033289882,
"grad_norm": 0.01865954701869269,
"learning_rate": 4.443258541900508e-07,
"loss": 0.0179,
"step": 730
},
{
"epoch": 0.884387847187406,
"grad_norm": 0.014772814789547404,
"learning_rate": 4.020070167012541e-07,
"loss": 0.0166,
"step": 735
},
{
"epoch": 0.8904040910458237,
"grad_norm": 0.0189883024943083,
"learning_rate": 3.6172124228221914e-07,
"loss": 0.0185,
"step": 740
},
{
"epoch": 0.8964203349042414,
"grad_norm": 0.01755215014494978,
"learning_rate": 3.23486343817942e-07,
"loss": 0.0174,
"step": 745
},
{
"epoch": 0.9024365787626591,
"grad_norm": 0.021255269979833603,
"learning_rate": 2.873192273716369e-07,
"loss": 0.0198,
"step": 750
},
{
"epoch": 0.908452822621077,
"grad_norm": 0.018993801870159418,
"learning_rate": 2.532358847095051e-07,
"loss": 0.0181,
"step": 755
},
{
"epoch": 0.9144690664794947,
"grad_norm": 0.01729608956020829,
"learning_rate": 2.2125138622976494e-07,
"loss": 0.019,
"step": 760
},
{
"epoch": 0.9204853103379124,
"grad_norm": 0.015801337315760878,
"learning_rate": 1.9137987429907635e-07,
"loss": 0.0168,
"step": 765
},
{
"epoch": 0.9265015541963301,
"grad_norm": 0.016592823438343365,
"learning_rate": 1.636345569993042e-07,
"loss": 0.0172,
"step": 770
},
{
"epoch": 0.9325177980547478,
"grad_norm": 0.013326130117293207,
"learning_rate": 1.3802770228739547e-07,
"loss": 0.0162,
"step": 775
},
{
"epoch": 0.9385340419131656,
"grad_norm": 0.017403164818775768,
"learning_rate": 1.1457063257093892e-07,
"loss": 0.0181,
"step": 780
},
{
"epoch": 0.9445502857715833,
"grad_norm": 0.01717367430718781,
"learning_rate": 9.32737197018152e-08,
"loss": 0.0193,
"step": 785
},
{
"epoch": 0.950566529630001,
"grad_norm": 0.01932159740421986,
"learning_rate": 7.414638039014266e-08,
"loss": 0.0189,
"step": 790
},
{
"epoch": 0.9565827734884187,
"grad_norm": 0.014545477047193084,
"learning_rate": 5.7197072040557356e-08,
"loss": 0.0175,
"step": 795
},
{
"epoch": 0.9625990173468365,
"grad_norm": 0.01471817818854388,
"learning_rate": 4.243328901266219e-08,
"loss": 0.0184,
"step": 800
},
{
"epoch": 0.9686152612052542,
"grad_norm": 0.016067984603921493,
"learning_rate": 2.986155930729484e-08,
"loss": 0.0163,
"step": 805
},
{
"epoch": 0.9746315050636719,
"grad_norm": 0.014988902741589531,
"learning_rate": 1.9487441680084983e-08,
"loss": 0.0174,
"step": 810
},
{
"epoch": 0.9806477489220896,
"grad_norm": 0.016141274215466517,
"learning_rate": 1.1315523183581534e-08,
"loss": 0.0186,
"step": 815
},
{
"epoch": 0.9866639927805073,
"grad_norm": 0.016494560923804934,
"learning_rate": 5.349417139022816e-09,
"loss": 0.0186,
"step": 820
},
{
"epoch": 0.9926802366389251,
"grad_norm": 0.019570420674046143,
"learning_rate": 1.591761538662362e-09,
"loss": 0.0203,
"step": 825
},
{
"epoch": 0.9986964804973428,
"grad_norm": 0.014921909514844484,
"learning_rate": 4.4217879344166104e-11,
"loss": 0.0162,
"step": 830
},
{
"epoch": 0.9998997292690264,
"step": 831,
"total_flos": 4.483822414295204e+18,
"train_loss": 0.043213658636630875,
"train_runtime": 13633.5754,
"train_samples_per_second": 2.926,
"train_steps_per_second": 0.061
}
],
"logging_steps": 5,
"max_steps": 831,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.483822414295204e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}