neur-0.0 / trainer_state.json

Upload folder using huggingface_hub

fbfd481 verified 6 months ago

26.8 kB

	{
	"best_global_step": null,
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 1.1981819998002197,
	"eval_steps": 500,
	"global_step": 1500,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0079912096693637,
	"grad_norm": 3.2677111625671387,
	"learning_rate": 4e-05,
	"loss": 1.8334,
	"step": 10
	},
	{
	"epoch": 0.0159824193387274,
	"grad_norm": 2.2300846576690674,
	"learning_rate": 8.444444444444444e-05,
	"loss": 1.1382,
	"step": 20
	},
	{
	"epoch": 0.0239736290080911,
	"grad_norm": 0.9214928150177002,
	"learning_rate": 0.00012888888888888892,
	"loss": 0.9079,
	"step": 30
	},
	{
	"epoch": 0.0319648386774548,
	"grad_norm": 0.7646894454956055,
	"learning_rate": 0.00017333333333333334,
	"loss": 0.8226,
	"step": 40
	},
	{
	"epoch": 0.0399560483468185,
	"grad_norm": 0.5976511240005493,
	"learning_rate": 0.00019999627041039135,
	"loss": 0.7371,
	"step": 50
	},
	{
	"epoch": 0.0479472580161822,
	"grad_norm": 0.5931047797203064,
	"learning_rate": 0.00019995431572214454,
	"loss": 0.8082,
	"step": 60
	},
	{
	"epoch": 0.055938467685545896,
	"grad_norm": 0.6012359857559204,
	"learning_rate": 0.00019986576398242566,
	"loss": 0.7508,
	"step": 70
	},
	{
	"epoch": 0.0639296773549096,
	"grad_norm": 0.5190461277961731,
	"learning_rate": 0.00019973065647259348,
	"loss": 0.7647,
	"step": 80
	},
	{
	"epoch": 0.0719208870242733,
	"grad_norm": 0.5790061354637146,
	"learning_rate": 0.00019954905617753814,
	"loss": 0.7418,
	"step": 90
	},
	{
	"epoch": 0.079912096693637,
	"grad_norm": 0.428479939699173,
	"learning_rate": 0.00019932104775631846,
	"loss": 0.6965,
	"step": 100
	},
	{
	"epoch": 0.0879033063630007,
	"grad_norm": 0.539878249168396,
	"learning_rate": 0.00019904673750269537,
	"loss": 0.7899,
	"step": 110
	},
	{
	"epoch": 0.0958945160323644,
	"grad_norm": 0.5310686230659485,
	"learning_rate": 0.00019872625329557953,
	"loss": 0.6959,
	"step": 120
	},
	{
	"epoch": 0.1038857257017281,
	"grad_norm": 0.4262120723724365,
	"learning_rate": 0.0001983597445394162,
	"loss": 0.7349,
	"step": 130
	},
	{
	"epoch": 0.11187693537109179,
	"grad_norm": 0.4776265621185303,
	"learning_rate": 0.00019794738209453545,
	"loss": 0.7591,
	"step": 140
	},
	{
	"epoch": 0.1198681450404555,
	"grad_norm": 0.3852095305919647,
	"learning_rate": 0.00019748935819749987,
	"loss": 0.6843,
	"step": 150
	},
	{
	"epoch": 0.1278593547098192,
	"grad_norm": 0.534788191318512,
	"learning_rate": 0.00019698588637148703,
	"loss": 0.7827,
	"step": 160
	},
	{
	"epoch": 0.1358505643791829,
	"grad_norm": 0.35920798778533936,
	"learning_rate": 0.00019643720132674856,
	"loss": 0.7002,
	"step": 170
	},
	{
	"epoch": 0.1438417740485466,
	"grad_norm": 0.403860479593277,
	"learning_rate": 0.00019584355885119196,
	"loss": 0.7003,
	"step": 180
	},
	{
	"epoch": 0.1518329837179103,
	"grad_norm": 0.5393890738487244,
	"learning_rate": 0.00019520523569113677,
	"loss": 0.6816,
	"step": 190
	},
	{
	"epoch": 0.159824193387274,
	"grad_norm": 0.3763524293899536,
	"learning_rate": 0.0001945225294222997,
	"loss": 0.6774,
	"step": 200
	},
	{
	"epoch": 0.1678154030566377,
	"grad_norm": 0.36979958415031433,
	"learning_rate": 0.00019379575831106994,
	"loss": 0.6983,
	"step": 210
	},
	{
	"epoch": 0.1758066127260014,
	"grad_norm": 0.384091854095459,
	"learning_rate": 0.00019302526116613864,
	"loss": 0.7057,
	"step": 220
	},
	{
	"epoch": 0.1837978223953651,
	"grad_norm": 0.45156919956207275,
	"learning_rate": 0.0001922113971805517,
	"loss": 0.7439,
	"step": 230
	},
	{
	"epoch": 0.1917890320647288,
	"grad_norm": 0.4209638833999634,
	"learning_rate": 0.0001913545457642601,
	"loss": 0.7141,
	"step": 240
	},
	{
	"epoch": 0.1997802417340925,
	"grad_norm": 0.5019676685333252,
	"learning_rate": 0.0001904551063672452,
	"loss": 0.7205,
	"step": 250
	},
	{
	"epoch": 0.2077714514034562,
	"grad_norm": 0.43800297379493713,
	"learning_rate": 0.00018951349829330168,
	"loss": 0.7181,
	"step": 260
	},
	{
	"epoch": 0.2157626610728199,
	"grad_norm": 0.40507107973098755,
	"learning_rate": 0.0001885301605045651,
	"loss": 0.7303,
	"step": 270
	},
	{
	"epoch": 0.22375387074218359,
	"grad_norm": 0.3452669084072113,
	"learning_rate": 0.000187505551416875,
	"loss": 0.647,
	"step": 280
	},
	{
	"epoch": 0.2317450804115473,
	"grad_norm": 0.3924757242202759,
	"learning_rate": 0.00018644014868606895,
	"loss": 0.6721,
	"step": 290
	},
	{
	"epoch": 0.239736290080911,
	"grad_norm": 0.5480809807777405,
	"learning_rate": 0.0001853344489853074,
	"loss": 0.7755,
	"step": 300
	},
	{
	"epoch": 0.2477274997502747,
	"grad_norm": 0.456853985786438,
	"learning_rate": 0.0001841889677735327,
	"loss": 0.7203,
	"step": 310
	},
	{
	"epoch": 0.2557187094196384,
	"grad_norm": 0.40140455961227417,
	"learning_rate": 0.0001830042390551708,
	"loss": 0.643,
	"step": 320
	},
	{
	"epoch": 0.2637099190890021,
	"grad_norm": 0.47574156522750854,
	"learning_rate": 0.00018178081513118706,
	"loss": 0.7128,
	"step": 330
	},
	{
	"epoch": 0.2717011287583658,
	"grad_norm": 0.43806251883506775,
	"learning_rate": 0.00018051926634161282,
	"loss": 0.6922,
	"step": 340
	},
	{
	"epoch": 0.2796923384277295,
	"grad_norm": 0.4530179500579834,
	"learning_rate": 0.0001792201807996622,
	"loss": 0.7907,
	"step": 350
	},
	{
	"epoch": 0.2876835480970932,
	"grad_norm": 0.40421706438064575,
	"learning_rate": 0.00017788416411756338,
	"loss": 0.7358,
	"step": 360
	},
	{
	"epoch": 0.2956747577664569,
	"grad_norm": 0.41535094380378723,
	"learning_rate": 0.00017651183912423228,
	"loss": 0.7031,
	"step": 370
	},
	{
	"epoch": 0.3036659674358206,
	"grad_norm": 0.3584170341491699,
	"learning_rate": 0.00017510384557492,
	"loss": 0.7208,
	"step": 380
	},
	{
	"epoch": 0.3116571771051843,
	"grad_norm": 0.42786943912506104,
	"learning_rate": 0.00017366083985296947,
	"loss": 0.7615,
	"step": 390
	},
	{
	"epoch": 0.319648386774548,
	"grad_norm": 0.4445035457611084,
	"learning_rate": 0.00017218349466382023,
	"loss": 0.7002,
	"step": 400
	},
	{
	"epoch": 0.3276395964439117,
	"grad_norm": 0.5019694566726685,
	"learning_rate": 0.0001706724987214045,
	"loss": 0.7204,
	"step": 410
	},
	{
	"epoch": 0.3356308061132754,
	"grad_norm": 0.3719067871570587,
	"learning_rate": 0.00016912855642708,
	"loss": 0.6981,
	"step": 420
	},
	{
	"epoch": 0.3436220157826391,
	"grad_norm": 0.47156500816345215,
	"learning_rate": 0.00016755238754124965,
	"loss": 0.6733,
	"step": 430
	},
	{
	"epoch": 0.3516132254520028,
	"grad_norm": 0.4605729579925537,
	"learning_rate": 0.0001659447268478212,
	"loss": 0.74,
	"step": 440
	},
	{
	"epoch": 0.3596044351213665,
	"grad_norm": 0.4625272750854492,
	"learning_rate": 0.00016430632381166305,
	"loss": 0.7508,
	"step": 450
	},
	{
	"epoch": 0.3675956447907302,
	"grad_norm": 0.4062426686286926,
	"learning_rate": 0.0001626379422292162,
	"loss": 0.7178,
	"step": 460
	},
	{
	"epoch": 0.3755868544600939,
	"grad_norm": 0.42503005266189575,
	"learning_rate": 0.00016094035987242484,
	"loss": 0.6757,
	"step": 470
	},
	{
	"epoch": 0.3835780641294576,
	"grad_norm": 0.4469659924507141,
	"learning_rate": 0.00015921436812615204,
	"loss": 0.723,
	"step": 480
	},
	{
	"epoch": 0.3915692737988213,
	"grad_norm": 0.3277670443058014,
	"learning_rate": 0.00015746077161924905,
	"loss": 0.7035,
	"step": 490
	},
	{
	"epoch": 0.399560483468185,
	"grad_norm": 0.4804005026817322,
	"learning_rate": 0.00015568038784945077,
	"loss": 0.7347,
	"step": 500
	},
	{
	"epoch": 0.4075516931375487,
	"grad_norm": 0.47554656863212585,
	"learning_rate": 0.00015387404680227175,
	"loss": 0.7332,
	"step": 510
	},
	{
	"epoch": 0.4155429028069124,
	"grad_norm": 0.47048240900039673,
	"learning_rate": 0.00015204259056408046,
	"loss": 0.7516,
	"step": 520
	},
	{
	"epoch": 0.4235341124762761,
	"grad_norm": 0.4335585832595825,
	"learning_rate": 0.00015018687292953293,
	"loss": 0.6726,
	"step": 530
	},
	{
	"epoch": 0.4315253221456398,
	"grad_norm": 0.30512747168540955,
	"learning_rate": 0.00014830775900354735,
	"loss": 0.6954,
	"step": 540
	},
	{
	"epoch": 0.4395165318150035,
	"grad_norm": 0.3644169867038727,
	"learning_rate": 0.00014640612479800686,
	"loss": 0.6699,
	"step": 550
	},
	{
	"epoch": 0.44750774148436717,
	"grad_norm": 0.7686610221862793,
	"learning_rate": 0.00014448285682337682,
	"loss": 0.6825,
	"step": 560
	},
	{
	"epoch": 0.4554989511537309,
	"grad_norm": 0.42735007405281067,
	"learning_rate": 0.00014253885167542866,
	"loss": 0.7192,
	"step": 570
	},
	{
	"epoch": 0.4634901608230946,
	"grad_norm": 0.3812963664531708,
	"learning_rate": 0.00014057501561726157,
	"loss": 0.708,
	"step": 580
	},
	{
	"epoch": 0.4714813704924583,
	"grad_norm": 0.3944273591041565,
	"learning_rate": 0.0001385922641568175,
	"loss": 0.7389,
	"step": 590
	},
	{
	"epoch": 0.479472580161822,
	"grad_norm": 0.44416099786758423,
	"learning_rate": 0.00013659152162008676,
	"loss": 0.7025,
	"step": 600
	},
	{
	"epoch": 0.4874637898311857,
	"grad_norm": 0.43281784653663635,
	"learning_rate": 0.0001345737207202023,
	"loss": 0.7012,
	"step": 610
	},
	{
	"epoch": 0.4954549995005494,
	"grad_norm": 0.44126081466674805,
	"learning_rate": 0.0001325398021226242,
	"loss": 0.6811,
	"step": 620
	},
	{
	"epoch": 0.5034462091699131,
	"grad_norm": 0.39465758204460144,
	"learning_rate": 0.00013049071400661716,
	"loss": 0.7229,
	"step": 630
	},
	{
	"epoch": 0.5114374188392768,
	"grad_norm": 0.4265965223312378,
	"learning_rate": 0.00012842741162322487,
	"loss": 0.66,
	"step": 640
	},
	{
	"epoch": 0.5194286285086405,
	"grad_norm": 0.3862599730491638,
	"learning_rate": 0.00012635085684994767,
	"loss": 0.7013,
	"step": 650
	},
	{
	"epoch": 0.5274198381780042,
	"grad_norm": 0.5364603400230408,
	"learning_rate": 0.00012426201774233135,
	"loss": 0.7172,
	"step": 660
	},
	{
	"epoch": 0.5354110478473679,
	"grad_norm": 0.5026273727416992,
	"learning_rate": 0.00012216186808267546,
	"loss": 0.7058,
	"step": 670
	},
	{
	"epoch": 0.5434022575167315,
	"grad_norm": 0.39891964197158813,
	"learning_rate": 0.0001200513869260721,
	"loss": 0.7015,
	"step": 680
	},
	{
	"epoch": 0.5513934671860953,
	"grad_norm": 0.45894622802734375,
	"learning_rate": 0.00011793155814398674,
	"loss": 0.753,
	"step": 690
	},
	{
	"epoch": 0.559384676855459,
	"grad_norm": 0.34293729066848755,
	"learning_rate": 0.00011580336996559343,
	"loss": 0.6815,
	"step": 700
	},
	{
	"epoch": 0.5673758865248227,
	"grad_norm": 0.4446139931678772,
	"learning_rate": 0.00011366781451707879,
	"loss": 0.6742,
	"step": 710
	},
	{
	"epoch": 0.5753670961941864,
	"grad_norm": 0.48648640513420105,
	"learning_rate": 0.0001115258873591291,
	"loss": 0.6994,
	"step": 720
	},
	{
	"epoch": 0.5833583058635501,
	"grad_norm": 0.41053032875061035,
	"learning_rate": 0.00010937858702281631,
	"loss": 0.7079,
	"step": 730
	},
	{
	"epoch": 0.5913495155329138,
	"grad_norm": 0.4511827230453491,
	"learning_rate": 0.00010722691454409943,
	"loss": 0.708,
	"step": 740
	},
	{
	"epoch": 0.5993407252022775,
	"grad_norm": 0.37551945447921753,
	"learning_rate": 0.00010507187299715815,
	"loss": 0.7,
	"step": 750
	},
	{
	"epoch": 0.6073319348716412,
	"grad_norm": 0.38525089621543884,
	"learning_rate": 0.00010291446702677599,
	"loss": 0.6866,
	"step": 760
	},
	{
	"epoch": 0.6153231445410049,
	"grad_norm": 0.3816082179546356,
	"learning_rate": 0.0001007557023799917,
	"loss": 0.7071,
	"step": 770
	},
	{
	"epoch": 0.6233143542103686,
	"grad_norm": 0.48344865441322327,
	"learning_rate": 9.859658543723659e-05,
	"loss": 0.7181,
	"step": 780
	},
	{
	"epoch": 0.6313055638797322,
	"grad_norm": 0.4207400977611542,
	"learning_rate": 9.643812274317644e-05,
	"loss": 0.7565,
	"step": 790
	},
	{
	"epoch": 0.639296773549096,
	"grad_norm": 0.5104153752326965,
	"learning_rate": 9.428132053747712e-05,
	"loss": 0.7211,
	"step": 800
	},
	{
	"epoch": 0.6472879832184597,
	"grad_norm": 0.40380504727363586,
	"learning_rate": 9.212718428571231e-05,
	"loss": 0.6808,
	"step": 810
	},
	{
	"epoch": 0.6552791928878234,
	"grad_norm": 0.53224778175354,
	"learning_rate": 8.997671821063191e-05,
	"loss": 0.6786,
	"step": 820
	},
	{
	"epoch": 0.663270402557187,
	"grad_norm": 0.42555850744247437,
	"learning_rate": 8.783092482401005e-05,
	"loss": 0.7767,
	"step": 830
	},
	{
	"epoch": 0.6712616122265508,
	"grad_norm": 0.4147053360939026,
	"learning_rate": 8.569080445929073e-05,
	"loss": 0.6728,
	"step": 840
	},
	{
	"epoch": 0.6792528218959145,
	"grad_norm": 0.3875350058078766,
	"learning_rate": 8.355735480524874e-05,
	"loss": 0.651,
	"step": 850
	},
	{
	"epoch": 0.6872440315652782,
	"grad_norm": 0.36827635765075684,
	"learning_rate": 8.143157044088377e-05,
	"loss": 0.6989,
	"step": 860
	},
	{
	"epoch": 0.6952352412346419,
	"grad_norm": 0.39672666788101196,
	"learning_rate": 7.931444237176398e-05,
	"loss": 0.6997,
	"step": 870
	},
	{
	"epoch": 0.7032264509040056,
	"grad_norm": 0.4494044780731201,
	"learning_rate": 7.72069575680357e-05,
	"loss": 0.6977,
	"step": 880
	},
	{
	"epoch": 0.7112176605733693,
	"grad_norm": 0.4261849522590637,
	"learning_rate": 7.5110098504314e-05,
	"loss": 0.7528,
	"step": 890
	},
	{
	"epoch": 0.719208870242733,
	"grad_norm": 0.3963007926940918,
	"learning_rate": 7.30248427016697e-05,
	"loss": 0.7152,
	"step": 900
	},
	{
	"epoch": 0.7272000799120967,
	"grad_norm": 0.346351683139801,
	"learning_rate": 7.095216227192467e-05,
	"loss": 0.6679,
	"step": 910
	},
	{
	"epoch": 0.7351912895814604,
	"grad_norm": 0.32863008975982666,
	"learning_rate": 6.889302346446969e-05,
	"loss": 0.6647,
	"step": 920
	},
	{
	"epoch": 0.7431824992508241,
	"grad_norm": 0.3735399544239044,
	"learning_rate": 6.684838621581478e-05,
	"loss": 0.6917,
	"step": 930
	},
	{
	"epoch": 0.7511737089201878,
	"grad_norm": 0.5078648924827576,
	"learning_rate": 6.481920370208274e-05,
	"loss": 0.7392,
	"step": 940
	},
	{
	"epoch": 0.7591649185895515,
	"grad_norm": 0.4455859065055847,
	"learning_rate": 6.28064218946542e-05,
	"loss": 0.6857,
	"step": 950
	},
	{
	"epoch": 0.7671561282589152,
	"grad_norm": 0.41593629121780396,
	"learning_rate": 6.0810979119171254e-05,
	"loss": 0.676,
	"step": 960
	},
	{
	"epoch": 0.775147337928279,
	"grad_norm": 0.3919152319431305,
	"learning_rate": 5.883380561810563e-05,
	"loss": 0.707,
	"step": 970
	},
	{
	"epoch": 0.7831385475976426,
	"grad_norm": 0.34168556332588196,
	"learning_rate": 5.6875823117095025e-05,
	"loss": 0.6813,
	"step": 980
	},
	{
	"epoch": 0.7911297572670063,
	"grad_norm": 0.3636936545372009,
	"learning_rate": 5.493794439524979e-05,
	"loss": 0.6822,
	"step": 990
	},
	{
	"epoch": 0.79912096693637,
	"grad_norm": 0.38939976692199707,
	"learning_rate": 5.302107285963045e-05,
	"loss": 0.7016,
	"step": 1000
	},
	{
	"epoch": 0.8071121766057336,
	"grad_norm": 0.4251338243484497,
	"learning_rate": 5.1126102124094064e-05,
	"loss": 0.662,
	"step": 1010
	},
	{
	"epoch": 0.8151033862750974,
	"grad_norm": 0.4065021276473999,
	"learning_rate": 4.9253915592706515e-05,
	"loss": 0.6864,
	"step": 1020
	},
	{
	"epoch": 0.8230945959444611,
	"grad_norm": 0.38323187828063965,
	"learning_rate": 4.74053860479137e-05,
	"loss": 0.6989,
	"step": 1030
	},
	{
	"epoch": 0.8310858056138248,
	"grad_norm": 0.36701148748397827,
	"learning_rate": 4.558137524366533e-05,
	"loss": 0.7326,
	"step": 1040
	},
	{
	"epoch": 0.8390770152831885,
	"grad_norm": 0.3849141299724579,
	"learning_rate": 4.3782733503678886e-05,
	"loss": 0.7265,
	"step": 1050
	},
	{
	"epoch": 0.8470682249525522,
	"grad_norm": 0.42157188057899475,
	"learning_rate": 4.2010299325033034e-05,
	"loss": 0.6975,
	"step": 1060
	},
	{
	"epoch": 0.8550594346219159,
	"grad_norm": 0.42658373713493347,
	"learning_rate": 4.026489898727419e-05,
	"loss": 0.6891,
	"step": 1070
	},
	{
	"epoch": 0.8630506442912796,
	"grad_norm": 0.605895459651947,
	"learning_rate": 3.854734616721852e-05,
	"loss": 0.7375,
	"step": 1080
	},
	{
	"epoch": 0.8710418539606433,
	"grad_norm": 0.40604451298713684,
	"learning_rate": 3.6858441559629306e-05,
	"loss": 0.7395,
	"step": 1090
	},
	{
	"epoch": 0.879033063630007,
	"grad_norm": 0.3668944537639618,
	"learning_rate": 3.519897250394612e-05,
	"loss": 0.6727,
	"step": 1100
	},
	{
	"epoch": 0.8870242732993707,
	"grad_norm": 0.42747315764427185,
	"learning_rate": 3.3569712617240435e-05,
	"loss": 0.6856,
	"step": 1110
	},
	{
	"epoch": 0.8950154829687343,
	"grad_norm": 0.4526374936103821,
	"learning_rate": 3.197142143356787e-05,
	"loss": 0.6866,
	"step": 1120
	},
	{
	"epoch": 0.9030066926380981,
	"grad_norm": 0.3767329454421997,
	"learning_rate": 3.040484404988614e-05,
	"loss": 0.667,
	"step": 1130
	},
	{
	"epoch": 0.9109979023074618,
	"grad_norm": 0.4605715572834015,
	"learning_rate": 2.8870710778703103e-05,
	"loss": 0.7107,
	"step": 1140
	},
	{
	"epoch": 0.9189891119768255,
	"grad_norm": 0.5346247553825378,
	"learning_rate": 2.736973680761702e-05,
	"loss": 0.7104,
	"step": 1150
	},
	{
	"epoch": 0.9269803216461892,
	"grad_norm": 0.4917076528072357,
	"learning_rate": 2.590262186590805e-05,
	"loss": 0.7009,
	"step": 1160
	},
	{
	"epoch": 0.9349715313155529,
	"grad_norm": 0.4159565269947052,
	"learning_rate": 2.447004989833599e-05,
	"loss": 0.6347,
	"step": 1170
	},
	{
	"epoch": 0.9429627409849166,
	"grad_norm": 0.3852473795413971,
	"learning_rate": 2.307268874629649e-05,
	"loss": 0.7313,
	"step": 1180
	},
	{
	"epoch": 0.9509539506542803,
	"grad_norm": 0.4686223268508911,
	"learning_rate": 2.1711189836484314e-05,
	"loss": 0.6376,
	"step": 1190
	},
	{
	"epoch": 0.958945160323644,
	"grad_norm": 0.4293384552001953,
	"learning_rate": 2.038618787720925e-05,
	"loss": 0.686,
	"step": 1200
	},
	{
	"epoch": 0.9669363699930077,
	"grad_norm": 0.49944257736206055,
	"learning_rate": 1.9098300562505266e-05,
	"loss": 0.7029,
	"step": 1210
	},
	{
	"epoch": 0.9749275796623714,
	"grad_norm": 0.3824506998062134,
	"learning_rate": 1.784812828417197e-05,
	"loss": 0.7253,
	"step": 1220
	},
	{
	"epoch": 0.982918789331735,
	"grad_norm": 0.3724282383918762,
	"learning_rate": 1.663625385188182e-05,
	"loss": 0.7033,
	"step": 1230
	},
	{
	"epoch": 0.9909099990010988,
	"grad_norm": 0.47136667370796204,
	"learning_rate": 1.5463242221483743e-05,
	"loss": 0.66,
	"step": 1240
	},
	{
	"epoch": 0.9989012086704625,
	"grad_norm": 0.42494097352027893,
	"learning_rate": 1.432964023163028e-05,
	"loss": 0.6818,
	"step": 1250
	},
	{
	"epoch": 1.006392967735491,
	"grad_norm": 0.48029494285583496,
	"learning_rate": 1.3235976348850165e-05,
	"loss": 0.656,
	"step": 1260
	},
	{
	"epoch": 1.0143841774048548,
	"grad_norm": 0.3681392967700958,
	"learning_rate": 1.218276042118629e-05,
	"loss": 0.6828,
	"step": 1270
	},
	{
	"epoch": 1.0223753870742183,
	"grad_norm": 0.3528028726577759,
	"learning_rate": 1.1170483440512614e-05,
	"loss": 0.6531,
	"step": 1280
	},
	{
	"epoch": 1.030366596743582,
	"grad_norm": 0.3827133774757385,
	"learning_rate": 1.0199617313642063e-05,
	"loss": 0.6469,
	"step": 1290
	},
	{
	"epoch": 1.0383578064129457,
	"grad_norm": 0.4323211908340454,
	"learning_rate": 9.270614642331376e-06,
	"loss": 0.6718,
	"step": 1300
	},
	{
	"epoch": 1.0463490160823095,
	"grad_norm": 0.4139029085636139,
	"learning_rate": 8.383908512285555e-06,
	"loss": 0.6629,
	"step": 1310
	},
	{
	"epoch": 1.0543402257516732,
	"grad_norm": 0.4098852872848511,
	"learning_rate": 7.5399122912605095e-06,
	"loss": 0.7382,
	"step": 1320
	},
	{
	"epoch": 1.062331435421037,
	"grad_norm": 0.3670465648174286,
	"learning_rate": 6.739019436357774e-06,
	"loss": 0.6502,
	"step": 1330
	},
	{
	"epoch": 1.0703226450904006,
	"grad_norm": 0.456601619720459,
	"learning_rate": 5.981603310601414e-06,
	"loss": 0.6587,
	"step": 1340
	},
	{
	"epoch": 1.0783138547597642,
	"grad_norm": 0.36275264620780945,
	"learning_rate": 5.2680170088822425e-06,
	"loss": 0.674,
	"step": 1350
	},
	{
	"epoch": 1.0863050644291279,
	"grad_norm": 0.5391157865524292,
	"learning_rate": 4.5985931933508754e-06,
	"loss": 0.6754,
	"step": 1360
	},
	{
	"epoch": 1.0942962740984916,
	"grad_norm": 0.3605053424835205,
	"learning_rate": 3.973643938336113e-06,
	"loss": 0.6801,
	"step": 1370
	},
	{
	"epoch": 1.1022874837678553,
	"grad_norm": 0.4948176443576813,
	"learning_rate": 3.393460584861008e-06,
	"loss": 0.7102,
	"step": 1380
	},
	{
	"epoch": 1.110278693437219,
	"grad_norm": 0.4405811131000519,
	"learning_rate": 2.8583136048245697e-06,
	"loss": 0.6844,
	"step": 1390
	},
	{
	"epoch": 1.1182699031065828,
	"grad_norm": 0.42184901237487793,
	"learning_rate": 2.368452474912153e-06,
	"loss": 0.6668,
	"step": 1400
	},
	{
	"epoch": 1.1262611127759465,
	"grad_norm": 0.5292870998382568,
	"learning_rate": 1.9241055602935877e-06,
	"loss": 0.6738,
	"step": 1410
	},
	{
	"epoch": 1.13425232244531,
	"grad_norm": 0.393926203250885,
	"learning_rate": 1.5254800081630826e-06,
	"loss": 0.684,
	"step": 1420
	},
	{
	"epoch": 1.1422435321146738,
	"grad_norm": 0.2781499922275543,
	"learning_rate": 1.1727616511706508e-06,
	"loss": 0.7076,
	"step": 1430
	},
	{
	"epoch": 1.1502347417840375,
	"grad_norm": 0.43156924843788147,
	"learning_rate": 8.661149207899844e-07,
	"loss": 0.6329,
	"step": 1440
	},
	{
	"epoch": 1.1582259514534012,
	"grad_norm": 0.40006959438323975,
	"learning_rate": 6.056827706632185e-07,
	"loss": 0.6547,
	"step": 1450
	},
	{
	"epoch": 1.166217161122765,
	"grad_norm": 0.40933167934417725,
	"learning_rate": 3.9158660995830545e-07,
	"loss": 0.7007,
	"step": 1460
	},
	{
	"epoch": 1.1742083707921287,
	"grad_norm": 0.4096595346927643,
	"learning_rate": 2.2392624677004536e-07,
	"loss": 0.6493,
	"step": 1470
	},
	{
	"epoch": 1.1821995804614924,
	"grad_norm": 0.3870149850845337,
	"learning_rate": 1.0277984159122733e-07,
	"loss": 0.6752,
	"step": 1480
	},
	{
	"epoch": 1.1901907901308562,
	"grad_norm": 0.41296494007110596,
	"learning_rate": 2.820387087548726e-08,
	"loss": 0.7173,
	"step": 1490
	},
	{
	"epoch": 1.1981819998002197,
	"grad_norm": 0.38857489824295044,
	"learning_rate": 2.331007089351189e-10,
	"loss": 0.7149,
	"step": 1500
	}
	],
	"logging_steps": 10,
	"max_steps": 1500,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 2,
	"save_steps": 200,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2.6210611577054822e+17,
	"train_batch_size": 1,
	"trial_name": null,
	"trial_params": null
	}