neur-0.0 / trainer_state.json
xenon111's picture
Upload folder using huggingface_hub
fbfd481 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.1981819998002197,
"eval_steps": 500,
"global_step": 1500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.0079912096693637,
"grad_norm": 3.2677111625671387,
"learning_rate": 4e-05,
"loss": 1.8334,
"step": 10
},
{
"epoch": 0.0159824193387274,
"grad_norm": 2.2300846576690674,
"learning_rate": 8.444444444444444e-05,
"loss": 1.1382,
"step": 20
},
{
"epoch": 0.0239736290080911,
"grad_norm": 0.9214928150177002,
"learning_rate": 0.00012888888888888892,
"loss": 0.9079,
"step": 30
},
{
"epoch": 0.0319648386774548,
"grad_norm": 0.7646894454956055,
"learning_rate": 0.00017333333333333334,
"loss": 0.8226,
"step": 40
},
{
"epoch": 0.0399560483468185,
"grad_norm": 0.5976511240005493,
"learning_rate": 0.00019999627041039135,
"loss": 0.7371,
"step": 50
},
{
"epoch": 0.0479472580161822,
"grad_norm": 0.5931047797203064,
"learning_rate": 0.00019995431572214454,
"loss": 0.8082,
"step": 60
},
{
"epoch": 0.055938467685545896,
"grad_norm": 0.6012359857559204,
"learning_rate": 0.00019986576398242566,
"loss": 0.7508,
"step": 70
},
{
"epoch": 0.0639296773549096,
"grad_norm": 0.5190461277961731,
"learning_rate": 0.00019973065647259348,
"loss": 0.7647,
"step": 80
},
{
"epoch": 0.0719208870242733,
"grad_norm": 0.5790061354637146,
"learning_rate": 0.00019954905617753814,
"loss": 0.7418,
"step": 90
},
{
"epoch": 0.079912096693637,
"grad_norm": 0.428479939699173,
"learning_rate": 0.00019932104775631846,
"loss": 0.6965,
"step": 100
},
{
"epoch": 0.0879033063630007,
"grad_norm": 0.539878249168396,
"learning_rate": 0.00019904673750269537,
"loss": 0.7899,
"step": 110
},
{
"epoch": 0.0958945160323644,
"grad_norm": 0.5310686230659485,
"learning_rate": 0.00019872625329557953,
"loss": 0.6959,
"step": 120
},
{
"epoch": 0.1038857257017281,
"grad_norm": 0.4262120723724365,
"learning_rate": 0.0001983597445394162,
"loss": 0.7349,
"step": 130
},
{
"epoch": 0.11187693537109179,
"grad_norm": 0.4776265621185303,
"learning_rate": 0.00019794738209453545,
"loss": 0.7591,
"step": 140
},
{
"epoch": 0.1198681450404555,
"grad_norm": 0.3852095305919647,
"learning_rate": 0.00019748935819749987,
"loss": 0.6843,
"step": 150
},
{
"epoch": 0.1278593547098192,
"grad_norm": 0.534788191318512,
"learning_rate": 0.00019698588637148703,
"loss": 0.7827,
"step": 160
},
{
"epoch": 0.1358505643791829,
"grad_norm": 0.35920798778533936,
"learning_rate": 0.00019643720132674856,
"loss": 0.7002,
"step": 170
},
{
"epoch": 0.1438417740485466,
"grad_norm": 0.403860479593277,
"learning_rate": 0.00019584355885119196,
"loss": 0.7003,
"step": 180
},
{
"epoch": 0.1518329837179103,
"grad_norm": 0.5393890738487244,
"learning_rate": 0.00019520523569113677,
"loss": 0.6816,
"step": 190
},
{
"epoch": 0.159824193387274,
"grad_norm": 0.3763524293899536,
"learning_rate": 0.0001945225294222997,
"loss": 0.6774,
"step": 200
},
{
"epoch": 0.1678154030566377,
"grad_norm": 0.36979958415031433,
"learning_rate": 0.00019379575831106994,
"loss": 0.6983,
"step": 210
},
{
"epoch": 0.1758066127260014,
"grad_norm": 0.384091854095459,
"learning_rate": 0.00019302526116613864,
"loss": 0.7057,
"step": 220
},
{
"epoch": 0.1837978223953651,
"grad_norm": 0.45156919956207275,
"learning_rate": 0.0001922113971805517,
"loss": 0.7439,
"step": 230
},
{
"epoch": 0.1917890320647288,
"grad_norm": 0.4209638833999634,
"learning_rate": 0.0001913545457642601,
"loss": 0.7141,
"step": 240
},
{
"epoch": 0.1997802417340925,
"grad_norm": 0.5019676685333252,
"learning_rate": 0.0001904551063672452,
"loss": 0.7205,
"step": 250
},
{
"epoch": 0.2077714514034562,
"grad_norm": 0.43800297379493713,
"learning_rate": 0.00018951349829330168,
"loss": 0.7181,
"step": 260
},
{
"epoch": 0.2157626610728199,
"grad_norm": 0.40507107973098755,
"learning_rate": 0.0001885301605045651,
"loss": 0.7303,
"step": 270
},
{
"epoch": 0.22375387074218359,
"grad_norm": 0.3452669084072113,
"learning_rate": 0.000187505551416875,
"loss": 0.647,
"step": 280
},
{
"epoch": 0.2317450804115473,
"grad_norm": 0.3924757242202759,
"learning_rate": 0.00018644014868606895,
"loss": 0.6721,
"step": 290
},
{
"epoch": 0.239736290080911,
"grad_norm": 0.5480809807777405,
"learning_rate": 0.0001853344489853074,
"loss": 0.7755,
"step": 300
},
{
"epoch": 0.2477274997502747,
"grad_norm": 0.456853985786438,
"learning_rate": 0.0001841889677735327,
"loss": 0.7203,
"step": 310
},
{
"epoch": 0.2557187094196384,
"grad_norm": 0.40140455961227417,
"learning_rate": 0.0001830042390551708,
"loss": 0.643,
"step": 320
},
{
"epoch": 0.2637099190890021,
"grad_norm": 0.47574156522750854,
"learning_rate": 0.00018178081513118706,
"loss": 0.7128,
"step": 330
},
{
"epoch": 0.2717011287583658,
"grad_norm": 0.43806251883506775,
"learning_rate": 0.00018051926634161282,
"loss": 0.6922,
"step": 340
},
{
"epoch": 0.2796923384277295,
"grad_norm": 0.4530179500579834,
"learning_rate": 0.0001792201807996622,
"loss": 0.7907,
"step": 350
},
{
"epoch": 0.2876835480970932,
"grad_norm": 0.40421706438064575,
"learning_rate": 0.00017788416411756338,
"loss": 0.7358,
"step": 360
},
{
"epoch": 0.2956747577664569,
"grad_norm": 0.41535094380378723,
"learning_rate": 0.00017651183912423228,
"loss": 0.7031,
"step": 370
},
{
"epoch": 0.3036659674358206,
"grad_norm": 0.3584170341491699,
"learning_rate": 0.00017510384557492,
"loss": 0.7208,
"step": 380
},
{
"epoch": 0.3116571771051843,
"grad_norm": 0.42786943912506104,
"learning_rate": 0.00017366083985296947,
"loss": 0.7615,
"step": 390
},
{
"epoch": 0.319648386774548,
"grad_norm": 0.4445035457611084,
"learning_rate": 0.00017218349466382023,
"loss": 0.7002,
"step": 400
},
{
"epoch": 0.3276395964439117,
"grad_norm": 0.5019694566726685,
"learning_rate": 0.0001706724987214045,
"loss": 0.7204,
"step": 410
},
{
"epoch": 0.3356308061132754,
"grad_norm": 0.3719067871570587,
"learning_rate": 0.00016912855642708,
"loss": 0.6981,
"step": 420
},
{
"epoch": 0.3436220157826391,
"grad_norm": 0.47156500816345215,
"learning_rate": 0.00016755238754124965,
"loss": 0.6733,
"step": 430
},
{
"epoch": 0.3516132254520028,
"grad_norm": 0.4605729579925537,
"learning_rate": 0.0001659447268478212,
"loss": 0.74,
"step": 440
},
{
"epoch": 0.3596044351213665,
"grad_norm": 0.4625272750854492,
"learning_rate": 0.00016430632381166305,
"loss": 0.7508,
"step": 450
},
{
"epoch": 0.3675956447907302,
"grad_norm": 0.4062426686286926,
"learning_rate": 0.0001626379422292162,
"loss": 0.7178,
"step": 460
},
{
"epoch": 0.3755868544600939,
"grad_norm": 0.42503005266189575,
"learning_rate": 0.00016094035987242484,
"loss": 0.6757,
"step": 470
},
{
"epoch": 0.3835780641294576,
"grad_norm": 0.4469659924507141,
"learning_rate": 0.00015921436812615204,
"loss": 0.723,
"step": 480
},
{
"epoch": 0.3915692737988213,
"grad_norm": 0.3277670443058014,
"learning_rate": 0.00015746077161924905,
"loss": 0.7035,
"step": 490
},
{
"epoch": 0.399560483468185,
"grad_norm": 0.4804005026817322,
"learning_rate": 0.00015568038784945077,
"loss": 0.7347,
"step": 500
},
{
"epoch": 0.4075516931375487,
"grad_norm": 0.47554656863212585,
"learning_rate": 0.00015387404680227175,
"loss": 0.7332,
"step": 510
},
{
"epoch": 0.4155429028069124,
"grad_norm": 0.47048240900039673,
"learning_rate": 0.00015204259056408046,
"loss": 0.7516,
"step": 520
},
{
"epoch": 0.4235341124762761,
"grad_norm": 0.4335585832595825,
"learning_rate": 0.00015018687292953293,
"loss": 0.6726,
"step": 530
},
{
"epoch": 0.4315253221456398,
"grad_norm": 0.30512747168540955,
"learning_rate": 0.00014830775900354735,
"loss": 0.6954,
"step": 540
},
{
"epoch": 0.4395165318150035,
"grad_norm": 0.3644169867038727,
"learning_rate": 0.00014640612479800686,
"loss": 0.6699,
"step": 550
},
{
"epoch": 0.44750774148436717,
"grad_norm": 0.7686610221862793,
"learning_rate": 0.00014448285682337682,
"loss": 0.6825,
"step": 560
},
{
"epoch": 0.4554989511537309,
"grad_norm": 0.42735007405281067,
"learning_rate": 0.00014253885167542866,
"loss": 0.7192,
"step": 570
},
{
"epoch": 0.4634901608230946,
"grad_norm": 0.3812963664531708,
"learning_rate": 0.00014057501561726157,
"loss": 0.708,
"step": 580
},
{
"epoch": 0.4714813704924583,
"grad_norm": 0.3944273591041565,
"learning_rate": 0.0001385922641568175,
"loss": 0.7389,
"step": 590
},
{
"epoch": 0.479472580161822,
"grad_norm": 0.44416099786758423,
"learning_rate": 0.00013659152162008676,
"loss": 0.7025,
"step": 600
},
{
"epoch": 0.4874637898311857,
"grad_norm": 0.43281784653663635,
"learning_rate": 0.0001345737207202023,
"loss": 0.7012,
"step": 610
},
{
"epoch": 0.4954549995005494,
"grad_norm": 0.44126081466674805,
"learning_rate": 0.0001325398021226242,
"loss": 0.6811,
"step": 620
},
{
"epoch": 0.5034462091699131,
"grad_norm": 0.39465758204460144,
"learning_rate": 0.00013049071400661716,
"loss": 0.7229,
"step": 630
},
{
"epoch": 0.5114374188392768,
"grad_norm": 0.4265965223312378,
"learning_rate": 0.00012842741162322487,
"loss": 0.66,
"step": 640
},
{
"epoch": 0.5194286285086405,
"grad_norm": 0.3862599730491638,
"learning_rate": 0.00012635085684994767,
"loss": 0.7013,
"step": 650
},
{
"epoch": 0.5274198381780042,
"grad_norm": 0.5364603400230408,
"learning_rate": 0.00012426201774233135,
"loss": 0.7172,
"step": 660
},
{
"epoch": 0.5354110478473679,
"grad_norm": 0.5026273727416992,
"learning_rate": 0.00012216186808267546,
"loss": 0.7058,
"step": 670
},
{
"epoch": 0.5434022575167315,
"grad_norm": 0.39891964197158813,
"learning_rate": 0.0001200513869260721,
"loss": 0.7015,
"step": 680
},
{
"epoch": 0.5513934671860953,
"grad_norm": 0.45894622802734375,
"learning_rate": 0.00011793155814398674,
"loss": 0.753,
"step": 690
},
{
"epoch": 0.559384676855459,
"grad_norm": 0.34293729066848755,
"learning_rate": 0.00011580336996559343,
"loss": 0.6815,
"step": 700
},
{
"epoch": 0.5673758865248227,
"grad_norm": 0.4446139931678772,
"learning_rate": 0.00011366781451707879,
"loss": 0.6742,
"step": 710
},
{
"epoch": 0.5753670961941864,
"grad_norm": 0.48648640513420105,
"learning_rate": 0.0001115258873591291,
"loss": 0.6994,
"step": 720
},
{
"epoch": 0.5833583058635501,
"grad_norm": 0.41053032875061035,
"learning_rate": 0.00010937858702281631,
"loss": 0.7079,
"step": 730
},
{
"epoch": 0.5913495155329138,
"grad_norm": 0.4511827230453491,
"learning_rate": 0.00010722691454409943,
"loss": 0.708,
"step": 740
},
{
"epoch": 0.5993407252022775,
"grad_norm": 0.37551945447921753,
"learning_rate": 0.00010507187299715815,
"loss": 0.7,
"step": 750
},
{
"epoch": 0.6073319348716412,
"grad_norm": 0.38525089621543884,
"learning_rate": 0.00010291446702677599,
"loss": 0.6866,
"step": 760
},
{
"epoch": 0.6153231445410049,
"grad_norm": 0.3816082179546356,
"learning_rate": 0.0001007557023799917,
"loss": 0.7071,
"step": 770
},
{
"epoch": 0.6233143542103686,
"grad_norm": 0.48344865441322327,
"learning_rate": 9.859658543723659e-05,
"loss": 0.7181,
"step": 780
},
{
"epoch": 0.6313055638797322,
"grad_norm": 0.4207400977611542,
"learning_rate": 9.643812274317644e-05,
"loss": 0.7565,
"step": 790
},
{
"epoch": 0.639296773549096,
"grad_norm": 0.5104153752326965,
"learning_rate": 9.428132053747712e-05,
"loss": 0.7211,
"step": 800
},
{
"epoch": 0.6472879832184597,
"grad_norm": 0.40380504727363586,
"learning_rate": 9.212718428571231e-05,
"loss": 0.6808,
"step": 810
},
{
"epoch": 0.6552791928878234,
"grad_norm": 0.53224778175354,
"learning_rate": 8.997671821063191e-05,
"loss": 0.6786,
"step": 820
},
{
"epoch": 0.663270402557187,
"grad_norm": 0.42555850744247437,
"learning_rate": 8.783092482401005e-05,
"loss": 0.7767,
"step": 830
},
{
"epoch": 0.6712616122265508,
"grad_norm": 0.4147053360939026,
"learning_rate": 8.569080445929073e-05,
"loss": 0.6728,
"step": 840
},
{
"epoch": 0.6792528218959145,
"grad_norm": 0.3875350058078766,
"learning_rate": 8.355735480524874e-05,
"loss": 0.651,
"step": 850
},
{
"epoch": 0.6872440315652782,
"grad_norm": 0.36827635765075684,
"learning_rate": 8.143157044088377e-05,
"loss": 0.6989,
"step": 860
},
{
"epoch": 0.6952352412346419,
"grad_norm": 0.39672666788101196,
"learning_rate": 7.931444237176398e-05,
"loss": 0.6997,
"step": 870
},
{
"epoch": 0.7032264509040056,
"grad_norm": 0.4494044780731201,
"learning_rate": 7.72069575680357e-05,
"loss": 0.6977,
"step": 880
},
{
"epoch": 0.7112176605733693,
"grad_norm": 0.4261849522590637,
"learning_rate": 7.5110098504314e-05,
"loss": 0.7528,
"step": 890
},
{
"epoch": 0.719208870242733,
"grad_norm": 0.3963007926940918,
"learning_rate": 7.30248427016697e-05,
"loss": 0.7152,
"step": 900
},
{
"epoch": 0.7272000799120967,
"grad_norm": 0.346351683139801,
"learning_rate": 7.095216227192467e-05,
"loss": 0.6679,
"step": 910
},
{
"epoch": 0.7351912895814604,
"grad_norm": 0.32863008975982666,
"learning_rate": 6.889302346446969e-05,
"loss": 0.6647,
"step": 920
},
{
"epoch": 0.7431824992508241,
"grad_norm": 0.3735399544239044,
"learning_rate": 6.684838621581478e-05,
"loss": 0.6917,
"step": 930
},
{
"epoch": 0.7511737089201878,
"grad_norm": 0.5078648924827576,
"learning_rate": 6.481920370208274e-05,
"loss": 0.7392,
"step": 940
},
{
"epoch": 0.7591649185895515,
"grad_norm": 0.4455859065055847,
"learning_rate": 6.28064218946542e-05,
"loss": 0.6857,
"step": 950
},
{
"epoch": 0.7671561282589152,
"grad_norm": 0.41593629121780396,
"learning_rate": 6.0810979119171254e-05,
"loss": 0.676,
"step": 960
},
{
"epoch": 0.775147337928279,
"grad_norm": 0.3919152319431305,
"learning_rate": 5.883380561810563e-05,
"loss": 0.707,
"step": 970
},
{
"epoch": 0.7831385475976426,
"grad_norm": 0.34168556332588196,
"learning_rate": 5.6875823117095025e-05,
"loss": 0.6813,
"step": 980
},
{
"epoch": 0.7911297572670063,
"grad_norm": 0.3636936545372009,
"learning_rate": 5.493794439524979e-05,
"loss": 0.6822,
"step": 990
},
{
"epoch": 0.79912096693637,
"grad_norm": 0.38939976692199707,
"learning_rate": 5.302107285963045e-05,
"loss": 0.7016,
"step": 1000
},
{
"epoch": 0.8071121766057336,
"grad_norm": 0.4251338243484497,
"learning_rate": 5.1126102124094064e-05,
"loss": 0.662,
"step": 1010
},
{
"epoch": 0.8151033862750974,
"grad_norm": 0.4065021276473999,
"learning_rate": 4.9253915592706515e-05,
"loss": 0.6864,
"step": 1020
},
{
"epoch": 0.8230945959444611,
"grad_norm": 0.38323187828063965,
"learning_rate": 4.74053860479137e-05,
"loss": 0.6989,
"step": 1030
},
{
"epoch": 0.8310858056138248,
"grad_norm": 0.36701148748397827,
"learning_rate": 4.558137524366533e-05,
"loss": 0.7326,
"step": 1040
},
{
"epoch": 0.8390770152831885,
"grad_norm": 0.3849141299724579,
"learning_rate": 4.3782733503678886e-05,
"loss": 0.7265,
"step": 1050
},
{
"epoch": 0.8470682249525522,
"grad_norm": 0.42157188057899475,
"learning_rate": 4.2010299325033034e-05,
"loss": 0.6975,
"step": 1060
},
{
"epoch": 0.8550594346219159,
"grad_norm": 0.42658373713493347,
"learning_rate": 4.026489898727419e-05,
"loss": 0.6891,
"step": 1070
},
{
"epoch": 0.8630506442912796,
"grad_norm": 0.605895459651947,
"learning_rate": 3.854734616721852e-05,
"loss": 0.7375,
"step": 1080
},
{
"epoch": 0.8710418539606433,
"grad_norm": 0.40604451298713684,
"learning_rate": 3.6858441559629306e-05,
"loss": 0.7395,
"step": 1090
},
{
"epoch": 0.879033063630007,
"grad_norm": 0.3668944537639618,
"learning_rate": 3.519897250394612e-05,
"loss": 0.6727,
"step": 1100
},
{
"epoch": 0.8870242732993707,
"grad_norm": 0.42747315764427185,
"learning_rate": 3.3569712617240435e-05,
"loss": 0.6856,
"step": 1110
},
{
"epoch": 0.8950154829687343,
"grad_norm": 0.4526374936103821,
"learning_rate": 3.197142143356787e-05,
"loss": 0.6866,
"step": 1120
},
{
"epoch": 0.9030066926380981,
"grad_norm": 0.3767329454421997,
"learning_rate": 3.040484404988614e-05,
"loss": 0.667,
"step": 1130
},
{
"epoch": 0.9109979023074618,
"grad_norm": 0.4605715572834015,
"learning_rate": 2.8870710778703103e-05,
"loss": 0.7107,
"step": 1140
},
{
"epoch": 0.9189891119768255,
"grad_norm": 0.5346247553825378,
"learning_rate": 2.736973680761702e-05,
"loss": 0.7104,
"step": 1150
},
{
"epoch": 0.9269803216461892,
"grad_norm": 0.4917076528072357,
"learning_rate": 2.590262186590805e-05,
"loss": 0.7009,
"step": 1160
},
{
"epoch": 0.9349715313155529,
"grad_norm": 0.4159565269947052,
"learning_rate": 2.447004989833599e-05,
"loss": 0.6347,
"step": 1170
},
{
"epoch": 0.9429627409849166,
"grad_norm": 0.3852473795413971,
"learning_rate": 2.307268874629649e-05,
"loss": 0.7313,
"step": 1180
},
{
"epoch": 0.9509539506542803,
"grad_norm": 0.4686223268508911,
"learning_rate": 2.1711189836484314e-05,
"loss": 0.6376,
"step": 1190
},
{
"epoch": 0.958945160323644,
"grad_norm": 0.4293384552001953,
"learning_rate": 2.038618787720925e-05,
"loss": 0.686,
"step": 1200
},
{
"epoch": 0.9669363699930077,
"grad_norm": 0.49944257736206055,
"learning_rate": 1.9098300562505266e-05,
"loss": 0.7029,
"step": 1210
},
{
"epoch": 0.9749275796623714,
"grad_norm": 0.3824506998062134,
"learning_rate": 1.784812828417197e-05,
"loss": 0.7253,
"step": 1220
},
{
"epoch": 0.982918789331735,
"grad_norm": 0.3724282383918762,
"learning_rate": 1.663625385188182e-05,
"loss": 0.7033,
"step": 1230
},
{
"epoch": 0.9909099990010988,
"grad_norm": 0.47136667370796204,
"learning_rate": 1.5463242221483743e-05,
"loss": 0.66,
"step": 1240
},
{
"epoch": 0.9989012086704625,
"grad_norm": 0.42494097352027893,
"learning_rate": 1.432964023163028e-05,
"loss": 0.6818,
"step": 1250
},
{
"epoch": 1.006392967735491,
"grad_norm": 0.48029494285583496,
"learning_rate": 1.3235976348850165e-05,
"loss": 0.656,
"step": 1260
},
{
"epoch": 1.0143841774048548,
"grad_norm": 0.3681392967700958,
"learning_rate": 1.218276042118629e-05,
"loss": 0.6828,
"step": 1270
},
{
"epoch": 1.0223753870742183,
"grad_norm": 0.3528028726577759,
"learning_rate": 1.1170483440512614e-05,
"loss": 0.6531,
"step": 1280
},
{
"epoch": 1.030366596743582,
"grad_norm": 0.3827133774757385,
"learning_rate": 1.0199617313642063e-05,
"loss": 0.6469,
"step": 1290
},
{
"epoch": 1.0383578064129457,
"grad_norm": 0.4323211908340454,
"learning_rate": 9.270614642331376e-06,
"loss": 0.6718,
"step": 1300
},
{
"epoch": 1.0463490160823095,
"grad_norm": 0.4139029085636139,
"learning_rate": 8.383908512285555e-06,
"loss": 0.6629,
"step": 1310
},
{
"epoch": 1.0543402257516732,
"grad_norm": 0.4098852872848511,
"learning_rate": 7.5399122912605095e-06,
"loss": 0.7382,
"step": 1320
},
{
"epoch": 1.062331435421037,
"grad_norm": 0.3670465648174286,
"learning_rate": 6.739019436357774e-06,
"loss": 0.6502,
"step": 1330
},
{
"epoch": 1.0703226450904006,
"grad_norm": 0.456601619720459,
"learning_rate": 5.981603310601414e-06,
"loss": 0.6587,
"step": 1340
},
{
"epoch": 1.0783138547597642,
"grad_norm": 0.36275264620780945,
"learning_rate": 5.2680170088822425e-06,
"loss": 0.674,
"step": 1350
},
{
"epoch": 1.0863050644291279,
"grad_norm": 0.5391157865524292,
"learning_rate": 4.5985931933508754e-06,
"loss": 0.6754,
"step": 1360
},
{
"epoch": 1.0942962740984916,
"grad_norm": 0.3605053424835205,
"learning_rate": 3.973643938336113e-06,
"loss": 0.6801,
"step": 1370
},
{
"epoch": 1.1022874837678553,
"grad_norm": 0.4948176443576813,
"learning_rate": 3.393460584861008e-06,
"loss": 0.7102,
"step": 1380
},
{
"epoch": 1.110278693437219,
"grad_norm": 0.4405811131000519,
"learning_rate": 2.8583136048245697e-06,
"loss": 0.6844,
"step": 1390
},
{
"epoch": 1.1182699031065828,
"grad_norm": 0.42184901237487793,
"learning_rate": 2.368452474912153e-06,
"loss": 0.6668,
"step": 1400
},
{
"epoch": 1.1262611127759465,
"grad_norm": 0.5292870998382568,
"learning_rate": 1.9241055602935877e-06,
"loss": 0.6738,
"step": 1410
},
{
"epoch": 1.13425232244531,
"grad_norm": 0.393926203250885,
"learning_rate": 1.5254800081630826e-06,
"loss": 0.684,
"step": 1420
},
{
"epoch": 1.1422435321146738,
"grad_norm": 0.2781499922275543,
"learning_rate": 1.1727616511706508e-06,
"loss": 0.7076,
"step": 1430
},
{
"epoch": 1.1502347417840375,
"grad_norm": 0.43156924843788147,
"learning_rate": 8.661149207899844e-07,
"loss": 0.6329,
"step": 1440
},
{
"epoch": 1.1582259514534012,
"grad_norm": 0.40006959438323975,
"learning_rate": 6.056827706632185e-07,
"loss": 0.6547,
"step": 1450
},
{
"epoch": 1.166217161122765,
"grad_norm": 0.40933167934417725,
"learning_rate": 3.9158660995830545e-07,
"loss": 0.7007,
"step": 1460
},
{
"epoch": 1.1742083707921287,
"grad_norm": 0.4096595346927643,
"learning_rate": 2.2392624677004536e-07,
"loss": 0.6493,
"step": 1470
},
{
"epoch": 1.1821995804614924,
"grad_norm": 0.3870149850845337,
"learning_rate": 1.0277984159122733e-07,
"loss": 0.6752,
"step": 1480
},
{
"epoch": 1.1901907901308562,
"grad_norm": 0.41296494007110596,
"learning_rate": 2.820387087548726e-08,
"loss": 0.7173,
"step": 1490
},
{
"epoch": 1.1981819998002197,
"grad_norm": 0.38857489824295044,
"learning_rate": 2.331007089351189e-10,
"loss": 0.7149,
"step": 1500
}
],
"logging_steps": 10,
"max_steps": 1500,
"num_input_tokens_seen": 0,
"num_train_epochs": 2,
"save_steps": 200,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 2.6210611577054822e+17,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}