Mistral-7B-PT-TEST / trainer_state.json
SamChen888's picture
Upload folder using huggingface_hub
4588d97 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 2.996770721205597,
"eval_steps": 500,
"global_step": 1392,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.021528525296017224,
"grad_norm": 1.8190886974334717,
"learning_rate": 2.9996179993481906e-05,
"loss": 0.264,
"step": 10
},
{
"epoch": 0.04305705059203445,
"grad_norm": 4.23043966293335,
"learning_rate": 2.9984721919587606e-05,
"loss": 0.1028,
"step": 20
},
{
"epoch": 0.06458557588805167,
"grad_norm": 1.842679738998413,
"learning_rate": 2.996563161430602e-05,
"loss": 0.114,
"step": 30
},
{
"epoch": 0.0861141011840689,
"grad_norm": 4.223649978637695,
"learning_rate": 2.9938918800982563e-05,
"loss": 0.0948,
"step": 40
},
{
"epoch": 0.10764262648008611,
"grad_norm": 2.1200666427612305,
"learning_rate": 2.9904597085366708e-05,
"loss": 0.1096,
"step": 50
},
{
"epoch": 0.12917115177610333,
"grad_norm": 2.793856143951416,
"learning_rate": 2.9862683948682103e-05,
"loss": 0.0956,
"step": 60
},
{
"epoch": 0.15069967707212056,
"grad_norm": 1.9462778568267822,
"learning_rate": 2.9813200738722784e-05,
"loss": 0.1017,
"step": 70
},
{
"epoch": 0.1722282023681378,
"grad_norm": 2.255049228668213,
"learning_rate": 2.975617265898004e-05,
"loss": 0.0694,
"step": 80
},
{
"epoch": 0.193756727664155,
"grad_norm": 1.4251642227172852,
"learning_rate": 2.9691628755805377e-05,
"loss": 0.069,
"step": 90
},
{
"epoch": 0.21528525296017223,
"grad_norm": 1.512846827507019,
"learning_rate": 2.961960190361624e-05,
"loss": 0.0861,
"step": 100
},
{
"epoch": 0.23681377825618946,
"grad_norm": 1.1422572135925293,
"learning_rate": 2.9540128788151935e-05,
"loss": 0.0829,
"step": 110
},
{
"epoch": 0.25834230355220666,
"grad_norm": 3.0731289386749268,
"learning_rate": 2.9453249887788343e-05,
"loss": 0.0811,
"step": 120
},
{
"epoch": 0.2798708288482239,
"grad_norm": 3.031052350997925,
"learning_rate": 2.9359009452920893e-05,
"loss": 0.0762,
"step": 130
},
{
"epoch": 0.3013993541442411,
"grad_norm": 2.248966932296753,
"learning_rate": 2.925745548342631e-05,
"loss": 0.0835,
"step": 140
},
{
"epoch": 0.32292787944025836,
"grad_norm": 0.9142462611198425,
"learning_rate": 2.9148639704214645e-05,
"loss": 0.074,
"step": 150
},
{
"epoch": 0.3444564047362756,
"grad_norm": 2.3527843952178955,
"learning_rate": 2.9032617538884018e-05,
"loss": 0.0674,
"step": 160
},
{
"epoch": 0.36598493003229277,
"grad_norm": 2.349313259124756,
"learning_rate": 2.890944808149146e-05,
"loss": 0.0934,
"step": 170
},
{
"epoch": 0.38751345532831,
"grad_norm": 0.6645804643630981,
"learning_rate": 2.877919406645433e-05,
"loss": 0.0759,
"step": 180
},
{
"epoch": 0.40904198062432723,
"grad_norm": 1.5764023065567017,
"learning_rate": 2.864192183659747e-05,
"loss": 0.0725,
"step": 190
},
{
"epoch": 0.43057050592034446,
"grad_norm": 2.184178590774536,
"learning_rate": 2.84977013093626e-05,
"loss": 0.0542,
"step": 200
},
{
"epoch": 0.4520990312163617,
"grad_norm": 1.8497698307037354,
"learning_rate": 2.8346605941196927e-05,
"loss": 0.0837,
"step": 210
},
{
"epoch": 0.4736275565123789,
"grad_norm": 1.5373315811157227,
"learning_rate": 2.818871269013928e-05,
"loss": 0.0717,
"step": 220
},
{
"epoch": 0.4951560818083961,
"grad_norm": 1.3783589601516724,
"learning_rate": 2.8024101976622762e-05,
"loss": 0.0577,
"step": 230
},
{
"epoch": 0.5166846071044133,
"grad_norm": 4.914410591125488,
"learning_rate": 2.7852857642513838e-05,
"loss": 0.0705,
"step": 240
},
{
"epoch": 0.5382131324004306,
"grad_norm": 0.8398504853248596,
"learning_rate": 2.7675066908408852e-05,
"loss": 0.0716,
"step": 250
},
{
"epoch": 0.5597416576964478,
"grad_norm": 1.0903675556182861,
"learning_rate": 2.7490820329209546e-05,
"loss": 0.08,
"step": 260
},
{
"epoch": 0.581270182992465,
"grad_norm": 1.7572460174560547,
"learning_rate": 2.7300211748000386e-05,
"loss": 0.0741,
"step": 270
},
{
"epoch": 0.6027987082884823,
"grad_norm": 1.668867588043213,
"learning_rate": 2.7103338248251055e-05,
"loss": 0.0631,
"step": 280
},
{
"epoch": 0.6243272335844995,
"grad_norm": 1.9639641046524048,
"learning_rate": 2.6900300104368527e-05,
"loss": 0.0802,
"step": 290
},
{
"epoch": 0.6458557588805167,
"grad_norm": 1.3819113969802856,
"learning_rate": 2.6691200730623874e-05,
"loss": 0.0647,
"step": 300
},
{
"epoch": 0.667384284176534,
"grad_norm": 1.6586377620697021,
"learning_rate": 2.6476146628479847e-05,
"loss": 0.0626,
"step": 310
},
{
"epoch": 0.6889128094725512,
"grad_norm": 0.7640856504440308,
"learning_rate": 2.6255247332346036e-05,
"loss": 0.0717,
"step": 320
},
{
"epoch": 0.7104413347685683,
"grad_norm": 0.8930771350860596,
"learning_rate": 2.602861535378925e-05,
"loss": 0.0617,
"step": 330
},
{
"epoch": 0.7319698600645855,
"grad_norm": 0.9496339559555054,
"learning_rate": 2.5796366124227532e-05,
"loss": 0.0672,
"step": 340
},
{
"epoch": 0.7534983853606028,
"grad_norm": 3.019853115081787,
"learning_rate": 2.5558617936136984e-05,
"loss": 0.0702,
"step": 350
},
{
"epoch": 0.77502691065662,
"grad_norm": 0.9336963295936584,
"learning_rate": 2.531549188280135e-05,
"loss": 0.0697,
"step": 360
},
{
"epoch": 0.7965554359526372,
"grad_norm": 0.7075727581977844,
"learning_rate": 2.50671117966351e-05,
"loss": 0.074,
"step": 370
},
{
"epoch": 0.8180839612486545,
"grad_norm": 0.5153305530548096,
"learning_rate": 2.481360418611132e-05,
"loss": 0.0566,
"step": 380
},
{
"epoch": 0.8396124865446717,
"grad_norm": 0.5062828660011292,
"learning_rate": 2.4555098171326616e-05,
"loss": 0.0792,
"step": 390
},
{
"epoch": 0.8611410118406889,
"grad_norm": 1.255761742591858,
"learning_rate": 2.4291725418235848e-05,
"loss": 0.0445,
"step": 400
},
{
"epoch": 0.8826695371367062,
"grad_norm": 0.9719372391700745,
"learning_rate": 2.4023620071590147e-05,
"loss": 0.0553,
"step": 410
},
{
"epoch": 0.9041980624327234,
"grad_norm": 1.868668794631958,
"learning_rate": 2.3750918686612414e-05,
"loss": 0.0555,
"step": 420
},
{
"epoch": 0.9257265877287406,
"grad_norm": 0.34430617094039917,
"learning_rate": 2.3473760159445058e-05,
"loss": 0.0611,
"step": 430
},
{
"epoch": 0.9472551130247578,
"grad_norm": 1.189942717552185,
"learning_rate": 2.3192285656405456e-05,
"loss": 0.0571,
"step": 440
},
{
"epoch": 0.9687836383207751,
"grad_norm": 0.5107014179229736,
"learning_rate": 2.2906638542085117e-05,
"loss": 0.0635,
"step": 450
},
{
"epoch": 0.9903121636167922,
"grad_norm": 0.685809850692749,
"learning_rate": 2.2616964306329183e-05,
"loss": 0.0584,
"step": 460
},
{
"epoch": 1.0118406889128095,
"grad_norm": 3.305742025375366,
"learning_rate": 2.2323410490133485e-05,
"loss": 0.0569,
"step": 470
},
{
"epoch": 1.0333692142088267,
"grad_norm": 1.87465500831604,
"learning_rate": 2.2026126610496852e-05,
"loss": 0.0481,
"step": 480
},
{
"epoch": 1.054897739504844,
"grad_norm": 0.7248936295509338,
"learning_rate": 2.172526408426702e-05,
"loss": 0.0295,
"step": 490
},
{
"epoch": 1.0764262648008611,
"grad_norm": 0.670519232749939,
"learning_rate": 2.1420976151018813e-05,
"loss": 0.0385,
"step": 500
},
{
"epoch": 1.0979547900968785,
"grad_norm": 1.4730095863342285,
"learning_rate": 2.1113417795004016e-05,
"loss": 0.063,
"step": 510
},
{
"epoch": 1.1194833153928956,
"grad_norm": 1.3478758335113525,
"learning_rate": 2.0802745666212592e-05,
"loss": 0.0528,
"step": 520
},
{
"epoch": 1.141011840688913,
"grad_norm": 0.6316215991973877,
"learning_rate": 2.048911800058546e-05,
"loss": 0.0347,
"step": 530
},
{
"epoch": 1.16254036598493,
"grad_norm": 1.4956326484680176,
"learning_rate": 2.0172694539419557e-05,
"loss": 0.049,
"step": 540
},
{
"epoch": 1.1840688912809472,
"grad_norm": 1.1988089084625244,
"learning_rate": 1.9853636448006094e-05,
"loss": 0.0471,
"step": 550
},
{
"epoch": 1.2055974165769645,
"grad_norm": 1.2572044134140015,
"learning_rate": 1.953210623354359e-05,
"loss": 0.06,
"step": 560
},
{
"epoch": 1.2271259418729816,
"grad_norm": 0.7759698033332825,
"learning_rate": 1.9208267662367378e-05,
"loss": 0.043,
"step": 570
},
{
"epoch": 1.248654467168999,
"grad_norm": 1.9407209157943726,
"learning_rate": 1.888228567653781e-05,
"loss": 0.051,
"step": 580
},
{
"epoch": 1.270182992465016,
"grad_norm": 1.0966278314590454,
"learning_rate": 1.8554326309829654e-05,
"loss": 0.0359,
"step": 590
},
{
"epoch": 1.2917115177610334,
"grad_norm": 2.063629150390625,
"learning_rate": 1.8224556603165363e-05,
"loss": 0.0484,
"step": 600
},
{
"epoch": 1.3132400430570506,
"grad_norm": 1.6178653240203857,
"learning_rate": 1.7893144519535468e-05,
"loss": 0.045,
"step": 610
},
{
"epoch": 1.334768568353068,
"grad_norm": 0.26466497778892517,
"learning_rate": 1.7560258858449248e-05,
"loss": 0.0528,
"step": 620
},
{
"epoch": 1.356297093649085,
"grad_norm": 1.890158772468567,
"learning_rate": 1.7226069169959393e-05,
"loss": 0.0527,
"step": 630
},
{
"epoch": 1.3778256189451024,
"grad_norm": 1.3726129531860352,
"learning_rate": 1.689074566830434e-05,
"loss": 0.0389,
"step": 640
},
{
"epoch": 1.3993541442411195,
"grad_norm": 1.0230239629745483,
"learning_rate": 1.655445914521236e-05,
"loss": 0.0506,
"step": 650
},
{
"epoch": 1.4208826695371366,
"grad_norm": 0.8005169630050659,
"learning_rate": 1.621738088291147e-05,
"loss": 0.0455,
"step": 660
},
{
"epoch": 1.442411194833154,
"grad_norm": 1.1895893812179565,
"learning_rate": 1.587968256688955e-05,
"loss": 0.039,
"step": 670
},
{
"epoch": 1.4639397201291713,
"grad_norm": 1.9981929063796997,
"learning_rate": 1.5541536198449044e-05,
"loss": 0.0512,
"step": 680
},
{
"epoch": 1.4854682454251884,
"grad_norm": 1.5658233165740967,
"learning_rate": 1.5203114007100828e-05,
"loss": 0.0263,
"step": 690
},
{
"epoch": 1.5069967707212055,
"grad_norm": 2.838642120361328,
"learning_rate": 1.4864588362841808e-05,
"loss": 0.0481,
"step": 700
},
{
"epoch": 1.5285252960172229,
"grad_norm": 0.6982723474502563,
"learning_rate": 1.4526131688360996e-05,
"loss": 0.0417,
"step": 710
},
{
"epoch": 1.55005382131324,
"grad_norm": 1.7505388259887695,
"learning_rate": 1.4187916371218739e-05,
"loss": 0.0486,
"step": 720
},
{
"epoch": 1.571582346609257,
"grad_norm": 2.41610050201416,
"learning_rate": 1.3850114676043837e-05,
"loss": 0.0249,
"step": 730
},
{
"epoch": 1.5931108719052745,
"grad_norm": 1.3201218843460083,
"learning_rate": 1.3512898656793283e-05,
"loss": 0.042,
"step": 740
},
{
"epoch": 1.6146393972012918,
"grad_norm": 0.9440786838531494,
"learning_rate": 1.3176440069119275e-05,
"loss": 0.0592,
"step": 750
},
{
"epoch": 1.636167922497309,
"grad_norm": 0.5338843464851379,
"learning_rate": 1.2840910282888211e-05,
"loss": 0.0405,
"step": 760
},
{
"epoch": 1.657696447793326,
"grad_norm": 1.0818413496017456,
"learning_rate": 1.2506480194896155e-05,
"loss": 0.0508,
"step": 770
},
{
"epoch": 1.6792249730893434,
"grad_norm": 1.209283471107483,
"learning_rate": 1.2173320141825232e-05,
"loss": 0.0342,
"step": 780
},
{
"epoch": 1.7007534983853607,
"grad_norm": 2.5324923992156982,
"learning_rate": 1.1841599813485341e-05,
"loss": 0.046,
"step": 790
},
{
"epoch": 1.7222820236813778,
"grad_norm": 1.514676809310913,
"learning_rate": 1.1511488166385349e-05,
"loss": 0.0348,
"step": 800
},
{
"epoch": 1.743810548977395,
"grad_norm": 1.4090155363082886,
"learning_rate": 1.1183153337677734e-05,
"loss": 0.0455,
"step": 810
},
{
"epoch": 1.7653390742734123,
"grad_norm": 2.2600796222686768,
"learning_rate": 1.0856762559520605e-05,
"loss": 0.0542,
"step": 820
},
{
"epoch": 1.7868675995694296,
"grad_norm": 1.2120071649551392,
"learning_rate": 1.0532482073900628e-05,
"loss": 0.0323,
"step": 830
},
{
"epoch": 1.8083961248654468,
"grad_norm": 1.3877032995224,
"learning_rate": 1.0210477047960303e-05,
"loss": 0.0456,
"step": 840
},
{
"epoch": 1.8299246501614639,
"grad_norm": 0.9278028607368469,
"learning_rate": 9.89091148987269e-06,
"loss": 0.037,
"step": 850
},
{
"epoch": 1.8514531754574812,
"grad_norm": 2.1230030059814453,
"learning_rate": 9.573948165306438e-06,
"loss": 0.0452,
"step": 860
},
{
"epoch": 1.8729817007534983,
"grad_norm": 0.6858197450637817,
"learning_rate": 9.259748514523654e-06,
"loss": 0.0536,
"step": 870
},
{
"epoch": 1.8945102260495155,
"grad_norm": 1.1023917198181152,
"learning_rate": 8.948472570152874e-06,
"loss": 0.0553,
"step": 880
},
{
"epoch": 1.9160387513455328,
"grad_norm": 0.5614004731178284,
"learning_rate": 8.64027887567895e-06,
"loss": 0.0479,
"step": 890
},
{
"epoch": 1.9375672766415502,
"grad_norm": 1.0492910146713257,
"learning_rate": 8.33532440469145e-06,
"loss": 0.0438,
"step": 900
},
{
"epoch": 1.9590958019375673,
"grad_norm": 0.30423790216445923,
"learning_rate": 8.033764480932616e-06,
"loss": 0.028,
"step": 910
},
{
"epoch": 1.9806243272335844,
"grad_norm": 1.6426568031311035,
"learning_rate": 7.735752699185711e-06,
"loss": 0.0574,
"step": 920
},
{
"epoch": 2.0021528525296017,
"grad_norm": 1.1288621425628662,
"learning_rate": 7.441440847043883e-06,
"loss": 0.0255,
"step": 930
},
{
"epoch": 2.023681377825619,
"grad_norm": 0.26666760444641113,
"learning_rate": 7.150978827599619e-06,
"loss": 0.028,
"step": 940
},
{
"epoch": 2.045209903121636,
"grad_norm": 0.33629775047302246,
"learning_rate": 6.864514583093911e-06,
"loss": 0.0178,
"step": 950
},
{
"epoch": 2.0667384284176533,
"grad_norm": 0.4371579885482788,
"learning_rate": 6.582194019564266e-06,
"loss": 0.0197,
"step": 960
},
{
"epoch": 2.0882669537136707,
"grad_norm": 1.305396318435669,
"learning_rate": 6.304160932529721e-06,
"loss": 0.03,
"step": 970
},
{
"epoch": 2.109795479009688,
"grad_norm": 6.668363571166992,
"learning_rate": 6.0305569337509225e-06,
"loss": 0.0309,
"step": 980
},
{
"epoch": 2.131324004305705,
"grad_norm": 1.8910939693450928,
"learning_rate": 5.761521379102343e-06,
"loss": 0.0262,
"step": 990
},
{
"epoch": 2.1528525296017222,
"grad_norm": 1.481408953666687,
"learning_rate": 5.497191297593647e-06,
"loss": 0.0337,
"step": 1000
},
{
"epoch": 2.1743810548977396,
"grad_norm": 1.0818077325820923,
"learning_rate": 5.237701321576063e-06,
"loss": 0.0365,
"step": 1010
},
{
"epoch": 2.195909580193757,
"grad_norm": 1.0381739139556885,
"learning_rate": 4.98318361816957e-06,
"loss": 0.0228,
"step": 1020
},
{
"epoch": 2.217438105489774,
"grad_norm": 0.31783393025398254,
"learning_rate": 4.733767821945621e-06,
"loss": 0.0278,
"step": 1030
},
{
"epoch": 2.238966630785791,
"grad_norm": 2.5186619758605957,
"learning_rate": 4.4895809688998655e-06,
"loss": 0.0302,
"step": 1040
},
{
"epoch": 2.2604951560818085,
"grad_norm": 0.6198469400405884,
"learning_rate": 4.25074743174833e-06,
"loss": 0.0138,
"step": 1050
},
{
"epoch": 2.282023681377826,
"grad_norm": 0.8775982856750488,
"learning_rate": 4.017388856580178e-06,
"loss": 0.0218,
"step": 1060
},
{
"epoch": 2.3035522066738428,
"grad_norm": 0.4356814920902252,
"learning_rate": 3.7896241008991596e-06,
"loss": 0.0284,
"step": 1070
},
{
"epoch": 2.32508073196986,
"grad_norm": 1.0270265340805054,
"learning_rate": 3.567569173085455e-06,
"loss": 0.0169,
"step": 1080
},
{
"epoch": 2.3466092572658774,
"grad_norm": 1.2356810569763184,
"learning_rate": 3.351337173308607e-06,
"loss": 0.0145,
"step": 1090
},
{
"epoch": 2.3681377825618943,
"grad_norm": 0.17152564227581024,
"learning_rate": 3.1410382359217645e-06,
"loss": 0.0249,
"step": 1100
},
{
"epoch": 2.3896663078579117,
"grad_norm": 0.13272231817245483,
"learning_rate": 2.9367794733664637e-06,
"loss": 0.0296,
"step": 1110
},
{
"epoch": 2.411194833153929,
"grad_norm": 2.4926042556762695,
"learning_rate": 2.7386649216166233e-06,
"loss": 0.031,
"step": 1120
},
{
"epoch": 2.4327233584499464,
"grad_norm": 0.5246890783309937,
"learning_rate": 2.546795487189436e-06,
"loss": 0.0294,
"step": 1130
},
{
"epoch": 2.4542518837459633,
"grad_norm": 1.739809513092041,
"learning_rate": 2.361268895750264e-06,
"loss": 0.0352,
"step": 1140
},
{
"epoch": 2.4757804090419806,
"grad_norm": 0.07230955362319946,
"learning_rate": 2.1821796423375766e-06,
"loss": 0.0177,
"step": 1150
},
{
"epoch": 2.497308934337998,
"grad_norm": 1.795920491218567,
"learning_rate": 2.0096189432334194e-06,
"loss": 0.032,
"step": 1160
},
{
"epoch": 2.518837459634015,
"grad_norm": 0.4120383560657501,
"learning_rate": 1.843674689503846e-06,
"loss": 0.0244,
"step": 1170
},
{
"epoch": 2.540365984930032,
"grad_norm": 1.3315762281417847,
"learning_rate": 1.6844314022329676e-06,
"loss": 0.0126,
"step": 1180
},
{
"epoch": 2.5618945102260495,
"grad_norm": 0.9914199709892273,
"learning_rate": 1.5319701894735023e-06,
"loss": 0.022,
"step": 1190
},
{
"epoch": 2.583423035522067,
"grad_norm": 0.9357948303222656,
"learning_rate": 1.3863687049356465e-06,
"loss": 0.0181,
"step": 1200
},
{
"epoch": 2.604951560818084,
"grad_norm": 2.104593515396118,
"learning_rate": 1.247701108435394e-06,
"loss": 0.0241,
"step": 1210
},
{
"epoch": 2.626480086114101,
"grad_norm": 1.0621205568313599,
"learning_rate": 1.116038028122413e-06,
"loss": 0.0292,
"step": 1220
},
{
"epoch": 2.6480086114101185,
"grad_norm": 1.7859629392623901,
"learning_rate": 9.914465245067022e-07,
"loss": 0.0201,
"step": 1230
},
{
"epoch": 2.669537136706136,
"grad_norm": 1.8932825326919556,
"learning_rate": 8.7399005630238e-07,
"loss": 0.0313,
"step": 1240
},
{
"epoch": 2.6910656620021527,
"grad_norm": 1.2083765268325806,
"learning_rate": 7.637284481059998e-07,
"loss": 0.0311,
"step": 1250
},
{
"epoch": 2.71259418729817,
"grad_norm": 0.1731128990650177,
"learning_rate": 6.607178599258268e-07,
"loss": 0.0134,
"step": 1260
},
{
"epoch": 2.7341227125941874,
"grad_norm": 1.8263607025146484,
"learning_rate": 5.650107585776348e-07,
"loss": 0.0348,
"step": 1270
},
{
"epoch": 2.7556512378902047,
"grad_norm": 1.52913498878479,
"learning_rate": 4.766558909615504e-07,
"loss": 0.0238,
"step": 1280
},
{
"epoch": 2.7771797631862216,
"grad_norm": 1.0334974527359009,
"learning_rate": 3.9569825923360503e-07,
"loss": 0.0285,
"step": 1290
},
{
"epoch": 2.798708288482239,
"grad_norm": 0.5131074786186218,
"learning_rate": 3.22179097884579e-07,
"loss": 0.0284,
"step": 1300
},
{
"epoch": 2.8202368137782563,
"grad_norm": 0.869399905204773,
"learning_rate": 2.5613585273788264e-07,
"loss": 0.0312,
"step": 1310
},
{
"epoch": 2.841765339074273,
"grad_norm": 1.1290533542633057,
"learning_rate": 1.9760216187710788e-07,
"loss": 0.0259,
"step": 1320
},
{
"epoch": 2.8632938643702905,
"grad_norm": 0.23688088357448578,
"learning_rate": 1.4660783851300318e-07,
"loss": 0.0263,
"step": 1330
},
{
"epoch": 2.884822389666308,
"grad_norm": 1.1585010290145874,
"learning_rate": 1.0317885579858522e-07,
"loss": 0.0175,
"step": 1340
},
{
"epoch": 2.9063509149623252,
"grad_norm": 0.5305848717689514,
"learning_rate": 6.733733360012761e-08,
"loss": 0.0379,
"step": 1350
},
{
"epoch": 2.9278794402583426,
"grad_norm": 1.1688823699951172,
"learning_rate": 3.910152723075322e-08,
"loss": 0.0401,
"step": 1360
},
{
"epoch": 2.9494079655543595,
"grad_norm": 1.1842941045761108,
"learning_rate": 1.848581815237671e-08,
"loss": 0.0174,
"step": 1370
},
{
"epoch": 2.970936490850377,
"grad_norm": 0.9176095724105835,
"learning_rate": 5.50070665074065e-09,
"loss": 0.0218,
"step": 1380
},
{
"epoch": 2.9924650161463937,
"grad_norm": 2.5070579051971436,
"learning_rate": 1.5280648725357615e-10,
"loss": 0.0288,
"step": 1390
},
{
"epoch": 2.996770721205597,
"step": 1392,
"total_flos": 2.2176668825577062e+17,
"train_loss": 0.04948104658677917,
"train_runtime": 1703.925,
"train_samples_per_second": 6.543,
"train_steps_per_second": 0.817
}
],
"logging_steps": 10,
"max_steps": 1392,
"num_input_tokens_seen": 0,
"num_train_epochs": 3,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.2176668825577062e+17,
"train_batch_size": 4,
"trial_name": null,
"trial_params": null
}