Upload folder using huggingface_hub

6ac8891 verified about 1 year ago

26.7 kB

	{
	"best_metric": null,
	"best_model_checkpoint": null,
	"epoch": 0.9997473684210526,
	"eval_steps": 500,
	"global_step": 1484,
	"is_hyper_param_search": false,
	"is_local_process_zero": true,
	"is_world_process_zero": true,
	"log_history": [
	{
	"epoch": 0.0006736842105263158,
	"grad_norm": 3.6967623233795166,
	"learning_rate": 1.3422818791946309e-06,
	"loss": 2.4093,
	"step": 1
	},
	{
	"epoch": 0.006736842105263158,
	"grad_norm": 2.5490193367004395,
	"learning_rate": 1.3422818791946309e-05,
	"loss": 2.4939,
	"step": 10
	},
	{
	"epoch": 0.013473684210526317,
	"grad_norm": 0.18483224511146545,
	"learning_rate": 2.6845637583892618e-05,
	"loss": 1.1877,
	"step": 20
	},
	{
	"epoch": 0.020210526315789474,
	"grad_norm": 0.2031693309545517,
	"learning_rate": 4.026845637583892e-05,
	"loss": 0.8909,
	"step": 30
	},
	{
	"epoch": 0.026947368421052633,
	"grad_norm": 0.6876732707023621,
	"learning_rate": 5.3691275167785237e-05,
	"loss": 0.7581,
	"step": 40
	},
	{
	"epoch": 0.03368421052631579,
	"grad_norm": 0.09247241914272308,
	"learning_rate": 6.711409395973155e-05,
	"loss": 0.7594,
	"step": 50
	},
	{
	"epoch": 0.04042105263157895,
	"grad_norm": 0.1324968934059143,
	"learning_rate": 8.053691275167784e-05,
	"loss": 0.7405,
	"step": 60
	},
	{
	"epoch": 0.04715789473684211,
	"grad_norm": 0.05673883110284805,
	"learning_rate": 9.395973154362417e-05,
	"loss": 0.7065,
	"step": 70
	},
	{
	"epoch": 0.053894736842105266,
	"grad_norm": 0.04617280140519142,
	"learning_rate": 0.00010738255033557047,
	"loss": 0.6817,
	"step": 80
	},
	{
	"epoch": 0.06063157894736842,
	"grad_norm": 0.04381496459245682,
	"learning_rate": 0.0001208053691275168,
	"loss": 0.6789,
	"step": 90
	},
	{
	"epoch": 0.06736842105263158,
	"grad_norm": 0.07428538799285889,
	"learning_rate": 0.0001342281879194631,
	"loss": 0.6816,
	"step": 100
	},
	{
	"epoch": 0.07410526315789474,
	"grad_norm": 0.04249708354473114,
	"learning_rate": 0.00014765100671140942,
	"loss": 0.6997,
	"step": 110
	},
	{
	"epoch": 0.0808421052631579,
	"grad_norm": 0.05957937240600586,
	"learning_rate": 0.0001610738255033557,
	"loss": 0.6807,
	"step": 120
	},
	{
	"epoch": 0.08757894736842105,
	"grad_norm": 0.03975442424416542,
	"learning_rate": 0.000174496644295302,
	"loss": 0.6733,
	"step": 130
	},
	{
	"epoch": 0.09431578947368421,
	"grad_norm": 0.04079463332891464,
	"learning_rate": 0.00018791946308724833,
	"loss": 0.6556,
	"step": 140
	},
	{
	"epoch": 0.10105263157894737,
	"grad_norm": 0.04245497286319733,
	"learning_rate": 0.00019985018726591762,
	"loss": 0.6575,
	"step": 150
	},
	{
	"epoch": 0.10778947368421053,
	"grad_norm": 0.09695123136043549,
	"learning_rate": 0.00019835205992509364,
	"loss": 0.6916,
	"step": 160
	},
	{
	"epoch": 0.11452631578947368,
	"grad_norm": 0.03505201265215874,
	"learning_rate": 0.00019685393258426966,
	"loss": 0.6622,
	"step": 170
	},
	{
	"epoch": 0.12126315789473684,
	"grad_norm": 0.02820334956049919,
	"learning_rate": 0.0001953558052434457,
	"loss": 0.6497,
	"step": 180
	},
	{
	"epoch": 0.128,
	"grad_norm": 0.04135354235768318,
	"learning_rate": 0.00019385767790262173,
	"loss": 0.6671,
	"step": 190
	},
	{
	"epoch": 0.13473684210526315,
	"grad_norm": 0.031461067497730255,
	"learning_rate": 0.00019235955056179775,
	"loss": 0.657,
	"step": 200
	},
	{
	"epoch": 0.1414736842105263,
	"grad_norm": 0.04208710789680481,
	"learning_rate": 0.0001908614232209738,
	"loss": 0.6766,
	"step": 210
	},
	{
	"epoch": 0.1482105263157895,
	"grad_norm": 3.495147705078125,
	"learning_rate": 0.00018936329588014982,
	"loss": 3.9378,
	"step": 220
	},
	{
	"epoch": 0.15494736842105264,
	"grad_norm": 0.18893112242221832,
	"learning_rate": 0.00018786516853932586,
	"loss": 7.1374,
	"step": 230
	},
	{
	"epoch": 0.1616842105263158,
	"grad_norm": 0.0959916040301323,
	"learning_rate": 0.00018636704119850189,
	"loss": 5.8104,
	"step": 240
	},
	{
	"epoch": 0.16842105263157894,
	"grad_norm": 0.08286964148283005,
	"learning_rate": 0.0001848689138576779,
	"loss": 4.7292,
	"step": 250
	},
	{
	"epoch": 0.1751578947368421,
	"grad_norm": 0.04510454833507538,
	"learning_rate": 0.00018337078651685393,
	"loss": 4.9858,
	"step": 260
	},
	{
	"epoch": 0.18189473684210528,
	"grad_norm": 0.2256896197795868,
	"learning_rate": 0.00018187265917602997,
	"loss": 4.7463,
	"step": 270
	},
	{
	"epoch": 0.18863157894736843,
	"grad_norm": 0.06342379748821259,
	"learning_rate": 0.00018037453183520602,
	"loss": 4.517,
	"step": 280
	},
	{
	"epoch": 0.19536842105263158,
	"grad_norm": 0.07497289776802063,
	"learning_rate": 0.00017887640449438204,
	"loss": 4.4052,
	"step": 290
	},
	{
	"epoch": 0.20210526315789473,
	"grad_norm": 0.08952877670526505,
	"learning_rate": 0.00017737827715355806,
	"loss": 3.9614,
	"step": 300
	},
	{
	"epoch": 0.20884210526315788,
	"grad_norm": 0.044066932052373886,
	"learning_rate": 0.00017588014981273408,
	"loss": 4.5861,
	"step": 310
	},
	{
	"epoch": 0.21557894736842106,
	"grad_norm": 0.08251778781414032,
	"learning_rate": 0.0001743820224719101,
	"loss": 4.5163,
	"step": 320
	},
	{
	"epoch": 0.22231578947368422,
	"grad_norm": 0.04723803699016571,
	"learning_rate": 0.00017288389513108615,
	"loss": 4.1904,
	"step": 330
	},
	{
	"epoch": 0.22905263157894737,
	"grad_norm": 0.09082615375518799,
	"learning_rate": 0.0001713857677902622,
	"loss": 4.1982,
	"step": 340
	},
	{
	"epoch": 0.23578947368421052,
	"grad_norm": 0.04866361245512962,
	"learning_rate": 0.00016988764044943822,
	"loss": 3.8506,
	"step": 350
	},
	{
	"epoch": 0.24252631578947367,
	"grad_norm": 0.04515402019023895,
	"learning_rate": 0.00016838951310861424,
	"loss": 4.4254,
	"step": 360
	},
	{
	"epoch": 0.24926315789473685,
	"grad_norm": 0.14205284416675568,
	"learning_rate": 0.00016689138576779026,
	"loss": 4.4111,
	"step": 370
	},
	{
	"epoch": 0.256,
	"grad_norm": 0.16082021594047546,
	"learning_rate": 0.0001653932584269663,
	"loss": 4.1119,
	"step": 380
	},
	{
	"epoch": 0.26273684210526316,
	"grad_norm": 0.061411116272211075,
	"learning_rate": 0.00016389513108614235,
	"loss": 4.059,
	"step": 390
	},
	{
	"epoch": 0.2694736842105263,
	"grad_norm": 0.058379318565130234,
	"learning_rate": 0.00016239700374531837,
	"loss": 3.7307,
	"step": 400
	},
	{
	"epoch": 0.27621052631578946,
	"grad_norm": 0.048859789967536926,
	"learning_rate": 0.0001608988764044944,
	"loss": 4.3039,
	"step": 410
	},
	{
	"epoch": 0.2829473684210526,
	"grad_norm": 0.06003361940383911,
	"learning_rate": 0.0001594007490636704,
	"loss": 4.2032,
	"step": 420
	},
	{
	"epoch": 0.28968421052631577,
	"grad_norm": 0.10120591521263123,
	"learning_rate": 0.00015790262172284646,
	"loss": 3.9567,
	"step": 430
	},
	{
	"epoch": 0.296421052631579,
	"grad_norm": 0.21033401787281036,
	"learning_rate": 0.00015640449438202248,
	"loss": 3.9369,
	"step": 440
	},
	{
	"epoch": 0.3031578947368421,
	"grad_norm": 0.06378967314958572,
	"learning_rate": 0.00015490636704119852,
	"loss": 3.6318,
	"step": 450
	},
	{
	"epoch": 0.3098947368421053,
	"grad_norm": 0.042198359966278076,
	"learning_rate": 0.00015340823970037455,
	"loss": 4.1789,
	"step": 460
	},
	{
	"epoch": 0.31663157894736843,
	"grad_norm": 0.053648848086595535,
	"learning_rate": 0.00015191011235955057,
	"loss": 4.1562,
	"step": 470
	},
	{
	"epoch": 0.3233684210526316,
	"grad_norm": 0.0808805301785469,
	"learning_rate": 0.00015041198501872659,
	"loss": 3.8883,
	"step": 480
	},
	{
	"epoch": 0.33010526315789473,
	"grad_norm": 0.13895294070243835,
	"learning_rate": 0.00014891385767790263,
	"loss": 3.9055,
	"step": 490
	},
	{
	"epoch": 0.3368421052631579,
	"grad_norm": 0.11999215185642242,
	"learning_rate": 0.00014741573033707865,
	"loss": 3.6025,
	"step": 500
	},
	{
	"epoch": 0.34357894736842104,
	"grad_norm": 0.0969998687505722,
	"learning_rate": 0.0001459176029962547,
	"loss": 4.2401,
	"step": 510
	},
	{
	"epoch": 0.3503157894736842,
	"grad_norm": 0.2578948438167572,
	"learning_rate": 0.00014441947565543072,
	"loss": 4.1355,
	"step": 520
	},
	{
	"epoch": 0.35705263157894734,
	"grad_norm": 0.067634217441082,
	"learning_rate": 0.00014292134831460674,
	"loss": 3.8735,
	"step": 530
	},
	{
	"epoch": 0.36378947368421055,
	"grad_norm": 0.1961352676153183,
	"learning_rate": 0.0001414232209737828,
	"loss": 3.7641,
	"step": 540
	},
	{
	"epoch": 0.3705263157894737,
	"grad_norm": 0.07940343767404556,
	"learning_rate": 0.0001399250936329588,
	"loss": 3.5177,
	"step": 550
	},
	{
	"epoch": 0.37726315789473686,
	"grad_norm": 1.3029491901397705,
	"learning_rate": 0.00013842696629213483,
	"loss": 4.1854,
	"step": 560
	},
	{
	"epoch": 0.384,
	"grad_norm": 0.10544762760400772,
	"learning_rate": 0.00013692883895131088,
	"loss": 4.3064,
	"step": 570
	},
	{
	"epoch": 0.39073684210526316,
	"grad_norm": 0.150394469499588,
	"learning_rate": 0.0001354307116104869,
	"loss": 3.9517,
	"step": 580
	},
	{
	"epoch": 0.3974736842105263,
	"grad_norm": 0.06921563297510147,
	"learning_rate": 0.00013393258426966294,
	"loss": 3.8917,
	"step": 590
	},
	{
	"epoch": 0.40421052631578946,
	"grad_norm": 0.06402010470628738,
	"learning_rate": 0.00013243445692883896,
	"loss": 3.5635,
	"step": 600
	},
	{
	"epoch": 0.4109473684210526,
	"grad_norm": 0.08918313682079315,
	"learning_rate": 0.00013093632958801498,
	"loss": 4.1197,
	"step": 610
	},
	{
	"epoch": 0.41768421052631577,
	"grad_norm": 0.054397523403167725,
	"learning_rate": 0.000129438202247191,
	"loss": 4.0442,
	"step": 620
	},
	{
	"epoch": 0.4244210526315789,
	"grad_norm": 0.068702831864357,
	"learning_rate": 0.00012794007490636705,
	"loss": 3.7506,
	"step": 630
	},
	{
	"epoch": 0.43115789473684213,
	"grad_norm": 0.14575353264808655,
	"learning_rate": 0.0001264419475655431,
	"loss": 3.7359,
	"step": 640
	},
	{
	"epoch": 0.4378947368421053,
	"grad_norm": 0.1481335461139679,
	"learning_rate": 0.00012494382022471912,
	"loss": 3.3705,
	"step": 650
	},
	{
	"epoch": 0.44463157894736843,
	"grad_norm": 0.06438197940587997,
	"learning_rate": 0.00012344569288389514,
	"loss": 4.0248,
	"step": 660
	},
	{
	"epoch": 0.4513684210526316,
	"grad_norm": 0.38855019211769104,
	"learning_rate": 0.00012194756554307116,
	"loss": 4.0265,
	"step": 670
	},
	{
	"epoch": 0.45810526315789474,
	"grad_norm": 0.20793034136295319,
	"learning_rate": 0.00012044943820224719,
	"loss": 3.7305,
	"step": 680
	},
	{
	"epoch": 0.4648421052631579,
	"grad_norm": 0.11011853814125061,
	"learning_rate": 0.00011895131086142324,
	"loss": 3.6933,
	"step": 690
	},
	{
	"epoch": 0.47157894736842104,
	"grad_norm": 0.06795340031385422,
	"learning_rate": 0.00011745318352059926,
	"loss": 3.3734,
	"step": 700
	},
	{
	"epoch": 0.4783157894736842,
	"grad_norm": 0.07788679003715515,
	"learning_rate": 0.00011595505617977529,
	"loss": 3.9053,
	"step": 710
	},
	{
	"epoch": 0.48505263157894735,
	"grad_norm": 0.07339611649513245,
	"learning_rate": 0.00011445692883895131,
	"loss": 3.8685,
	"step": 720
	},
	{
	"epoch": 0.4917894736842105,
	"grad_norm": 0.16048288345336914,
	"learning_rate": 0.00011295880149812735,
	"loss": 3.5673,
	"step": 730
	},
	{
	"epoch": 0.4985263157894737,
	"grad_norm": 0.2596355974674225,
	"learning_rate": 0.00011146067415730337,
	"loss": 3.5684,
	"step": 740
	},
	{
	"epoch": 0.5052631578947369,
	"grad_norm": 0.10115884989500046,
	"learning_rate": 0.00010996254681647941,
	"loss": 3.2226,
	"step": 750
	},
	{
	"epoch": 0.512,
	"grad_norm": 0.13997367024421692,
	"learning_rate": 0.00010846441947565545,
	"loss": 3.8579,
	"step": 760
	},
	{
	"epoch": 0.5187368421052632,
	"grad_norm": 0.08359155058860779,
	"learning_rate": 0.00010696629213483147,
	"loss": 3.8313,
	"step": 770
	},
	{
	"epoch": 0.5254736842105263,
	"grad_norm": 0.2407791018486023,
	"learning_rate": 0.0001054681647940075,
	"loss": 3.5257,
	"step": 780
	},
	{
	"epoch": 0.5322105263157895,
	"grad_norm": 0.34615418314933777,
	"learning_rate": 0.00010397003745318352,
	"loss": 3.5113,
	"step": 790
	},
	{
	"epoch": 0.5389473684210526,
	"grad_norm": 0.06987264007329941,
	"learning_rate": 0.00010247191011235954,
	"loss": 3.1525,
	"step": 800
	},
	{
	"epoch": 0.5456842105263158,
	"grad_norm": 0.07933894544839859,
	"learning_rate": 0.00010097378277153558,
	"loss": 3.718,
	"step": 810
	},
	{
	"epoch": 0.5524210526315789,
	"grad_norm": 0.12424171715974808,
	"learning_rate": 9.947565543071161e-05,
	"loss": 3.6641,
	"step": 820
	},
	{
	"epoch": 0.5591578947368421,
	"grad_norm": 0.2515564262866974,
	"learning_rate": 9.797752808988764e-05,
	"loss": 3.4268,
	"step": 830
	},
	{
	"epoch": 0.5658947368421052,
	"grad_norm": 0.30851560831069946,
	"learning_rate": 9.647940074906368e-05,
	"loss": 3.3856,
	"step": 840
	},
	{
	"epoch": 0.5726315789473684,
	"grad_norm": 0.05149822682142258,
	"learning_rate": 9.49812734082397e-05,
	"loss": 3.1259,
	"step": 850
	},
	{
	"epoch": 0.5793684210526315,
	"grad_norm": 0.17960771918296814,
	"learning_rate": 9.348314606741574e-05,
	"loss": 3.6767,
	"step": 860
	},
	{
	"epoch": 0.5861052631578947,
	"grad_norm": 0.17523854970932007,
	"learning_rate": 9.198501872659176e-05,
	"loss": 3.5995,
	"step": 870
	},
	{
	"epoch": 0.592842105263158,
	"grad_norm": 0.3186163008213043,
	"learning_rate": 9.04868913857678e-05,
	"loss": 3.3966,
	"step": 880
	},
	{
	"epoch": 0.5995789473684211,
	"grad_norm": 0.21263690292835236,
	"learning_rate": 8.898876404494383e-05,
	"loss": 3.3526,
	"step": 890
	},
	{
	"epoch": 0.6063157894736843,
	"grad_norm": 0.10399254411458969,
	"learning_rate": 8.749063670411985e-05,
	"loss": 3.0519,
	"step": 900
	},
	{
	"epoch": 0.6130526315789474,
	"grad_norm": 0.13143524527549744,
	"learning_rate": 8.599250936329589e-05,
	"loss": 3.629,
	"step": 910
	},
	{
	"epoch": 0.6197894736842106,
	"grad_norm": 0.15374666452407837,
	"learning_rate": 8.449438202247192e-05,
	"loss": 3.6895,
	"step": 920
	},
	{
	"epoch": 0.6265263157894737,
	"grad_norm": 0.23757484555244446,
	"learning_rate": 8.299625468164794e-05,
	"loss": 3.3622,
	"step": 930
	},
	{
	"epoch": 0.6332631578947369,
	"grad_norm": 0.1661984622478485,
	"learning_rate": 8.149812734082397e-05,
	"loss": 3.3248,
	"step": 940
	},
	{
	"epoch": 0.64,
	"grad_norm": 0.08603614568710327,
	"learning_rate": 8e-05,
	"loss": 3.0086,
	"step": 950
	},
	{
	"epoch": 0.6467368421052632,
	"grad_norm": 0.07694745808839798,
	"learning_rate": 7.850187265917604e-05,
	"loss": 3.5162,
	"step": 960
	},
	{
	"epoch": 0.6534736842105263,
	"grad_norm": 0.16395558416843414,
	"learning_rate": 7.700374531835206e-05,
	"loss": 3.4812,
	"step": 970
	},
	{
	"epoch": 0.6602105263157895,
	"grad_norm": 0.13817398250102997,
	"learning_rate": 7.55056179775281e-05,
	"loss": 3.2516,
	"step": 980
	},
	{
	"epoch": 0.6669473684210526,
	"grad_norm": 0.25807198882102966,
	"learning_rate": 7.400749063670413e-05,
	"loss": 3.2101,
	"step": 990
	},
	{
	"epoch": 0.6736842105263158,
	"grad_norm": 0.06848172843456268,
	"learning_rate": 7.250936329588015e-05,
	"loss": 2.93,
	"step": 1000
	},
	{
	"epoch": 0.6804210526315789,
	"grad_norm": 1.089575171470642,
	"learning_rate": 7.101123595505618e-05,
	"loss": 3.4925,
	"step": 1010
	},
	{
	"epoch": 0.6871578947368421,
	"grad_norm": 0.20126965641975403,
	"learning_rate": 6.951310861423222e-05,
	"loss": 3.4603,
	"step": 1020
	},
	{
	"epoch": 0.6938947368421052,
	"grad_norm": 0.21779027581214905,
	"learning_rate": 6.801498127340824e-05,
	"loss": 3.1723,
	"step": 1030
	},
	{
	"epoch": 0.7006315789473684,
	"grad_norm": 0.18239159882068634,
	"learning_rate": 6.651685393258428e-05,
	"loss": 3.1903,
	"step": 1040
	},
	{
	"epoch": 0.7073684210526315,
	"grad_norm": 0.06677573919296265,
	"learning_rate": 6.50187265917603e-05,
	"loss": 2.8445,
	"step": 1050
	},
	{
	"epoch": 0.7141052631578947,
	"grad_norm": 0.42619746923446655,
	"learning_rate": 6.352059925093634e-05,
	"loss": 3.4319,
	"step": 1060
	},
	{
	"epoch": 0.7208421052631578,
	"grad_norm": 0.12023507058620453,
	"learning_rate": 6.202247191011237e-05,
	"loss": 3.3826,
	"step": 1070
	},
	{
	"epoch": 0.7275789473684211,
	"grad_norm": 0.15099403262138367,
	"learning_rate": 6.052434456928839e-05,
	"loss": 3.1425,
	"step": 1080
	},
	{
	"epoch": 0.7343157894736843,
	"grad_norm": 0.3474717438220978,
	"learning_rate": 5.902621722846442e-05,
	"loss": 3.1279,
	"step": 1090
	},
	{
	"epoch": 0.7410526315789474,
	"grad_norm": 0.12225649505853653,
	"learning_rate": 5.752808988764046e-05,
	"loss": 2.9033,
	"step": 1100
	},
	{
	"epoch": 0.7477894736842106,
	"grad_norm": 0.19639068841934204,
	"learning_rate": 5.6029962546816485e-05,
	"loss": 3.3681,
	"step": 1110
	},
	{
	"epoch": 0.7545263157894737,
	"grad_norm": 0.10571427643299103,
	"learning_rate": 5.453183520599251e-05,
	"loss": 3.335,
	"step": 1120
	},
	{
	"epoch": 0.7612631578947369,
	"grad_norm": 0.5154901146888733,
	"learning_rate": 5.3033707865168545e-05,
	"loss": 3.0952,
	"step": 1130
	},
	{
	"epoch": 0.768,
	"grad_norm": 0.6122628450393677,
	"learning_rate": 5.153558052434457e-05,
	"loss": 3.1269,
	"step": 1140
	},
	{
	"epoch": 0.7747368421052632,
	"grad_norm": 0.19698569178581238,
	"learning_rate": 5.00374531835206e-05,
	"loss": 2.8233,
	"step": 1150
	},
	{
	"epoch": 0.7814736842105263,
	"grad_norm": 0.13018374145030975,
	"learning_rate": 4.853932584269663e-05,
	"loss": 3.3094,
	"step": 1160
	},
	{
	"epoch": 0.7882105263157895,
	"grad_norm": 0.09522128850221634,
	"learning_rate": 4.704119850187266e-05,
	"loss": 3.2765,
	"step": 1170
	},
	{
	"epoch": 0.7949473684210526,
	"grad_norm": 0.10098107159137726,
	"learning_rate": 4.554307116104869e-05,
	"loss": 3.0807,
	"step": 1180
	},
	{
	"epoch": 0.8016842105263158,
	"grad_norm": 0.18019132316112518,
	"learning_rate": 4.404494382022472e-05,
	"loss": 3.0332,
	"step": 1190
	},
	{
	"epoch": 0.8084210526315789,
	"grad_norm": 0.16289708018302917,
	"learning_rate": 4.2546816479400754e-05,
	"loss": 2.7374,
	"step": 1200
	},
	{
	"epoch": 0.8151578947368421,
	"grad_norm": 0.12666673958301544,
	"learning_rate": 4.104868913857678e-05,
	"loss": 3.2118,
	"step": 1210
	},
	{
	"epoch": 0.8218947368421052,
	"grad_norm": 0.16891352832317352,
	"learning_rate": 3.955056179775281e-05,
	"loss": 3.1902,
	"step": 1220
	},
	{
	"epoch": 0.8286315789473684,
	"grad_norm": 0.10958009213209152,
	"learning_rate": 3.805243445692884e-05,
	"loss": 2.9862,
	"step": 1230
	},
	{
	"epoch": 0.8353684210526315,
	"grad_norm": 0.10642745345830917,
	"learning_rate": 3.655430711610487e-05,
	"loss": 3.0052,
	"step": 1240
	},
	{
	"epoch": 0.8421052631578947,
	"grad_norm": 0.05656813085079193,
	"learning_rate": 3.50561797752809e-05,
	"loss": 2.723,
	"step": 1250
	},
	{
	"epoch": 0.8488421052631578,
	"grad_norm": 0.08322717994451523,
	"learning_rate": 3.355805243445693e-05,
	"loss": 3.234,
	"step": 1260
	},
	{
	"epoch": 0.8555789473684211,
	"grad_norm": 0.13246551156044006,
	"learning_rate": 3.2059925093632956e-05,
	"loss": 3.212,
	"step": 1270
	},
	{
	"epoch": 0.8623157894736843,
	"grad_norm": 0.10225304961204529,
	"learning_rate": 3.056179775280899e-05,
	"loss": 2.9484,
	"step": 1280
	},
	{
	"epoch": 0.8690526315789474,
	"grad_norm": 0.19440552592277527,
	"learning_rate": 2.9063670411985024e-05,
	"loss": 2.9266,
	"step": 1290
	},
	{
	"epoch": 0.8757894736842106,
	"grad_norm": 0.08913037180900574,
	"learning_rate": 2.7565543071161047e-05,
	"loss": 2.6801,
	"step": 1300
	},
	{
	"epoch": 0.8825263157894737,
	"grad_norm": 0.10815408080816269,
	"learning_rate": 2.606741573033708e-05,
	"loss": 3.1505,
	"step": 1310
	},
	{
	"epoch": 0.8892631578947369,
	"grad_norm": 0.14371147751808167,
	"learning_rate": 2.4569288389513108e-05,
	"loss": 3.1293,
	"step": 1320
	},
	{
	"epoch": 0.896,
	"grad_norm": 0.1680973470211029,
	"learning_rate": 2.3071161048689138e-05,
	"loss": 2.8961,
	"step": 1330
	},
	{
	"epoch": 0.9027368421052632,
	"grad_norm": 0.19012019038200378,
	"learning_rate": 2.157303370786517e-05,
	"loss": 2.9096,
	"step": 1340
	},
	{
	"epoch": 0.9094736842105263,
	"grad_norm": 0.060957688838243484,
	"learning_rate": 2.00749063670412e-05,
	"loss": 2.6879,
	"step": 1350
	},
	{
	"epoch": 0.9162105263157895,
	"grad_norm": 0.15055014193058014,
	"learning_rate": 1.857677902621723e-05,
	"loss": 3.108,
	"step": 1360
	},
	{
	"epoch": 0.9229473684210526,
	"grad_norm": 0.1378874033689499,
	"learning_rate": 1.707865168539326e-05,
	"loss": 3.0428,
	"step": 1370
	},
	{
	"epoch": 0.9296842105263158,
	"grad_norm": 0.14901022613048553,
	"learning_rate": 1.558052434456929e-05,
	"loss": 2.8589,
	"step": 1380
	},
	{
	"epoch": 0.9364210526315789,
	"grad_norm": 0.17515867948532104,
	"learning_rate": 1.4082397003745318e-05,
	"loss": 2.8563,
	"step": 1390
	},
	{
	"epoch": 0.9431578947368421,
	"grad_norm": 0.11909812688827515,
	"learning_rate": 1.258426966292135e-05,
	"loss": 2.5759,
	"step": 1400
	},
	{
	"epoch": 0.9498947368421052,
	"grad_norm": 0.16348549723625183,
	"learning_rate": 1.1086142322097379e-05,
	"loss": 3.089,
	"step": 1410
	},
	{
	"epoch": 0.9566315789473684,
	"grad_norm": 0.08107765763998032,
	"learning_rate": 9.588014981273409e-06,
	"loss": 3.0145,
	"step": 1420
	},
	{
	"epoch": 0.9633684210526315,
	"grad_norm": 0.13251617550849915,
	"learning_rate": 8.089887640449438e-06,
	"loss": 2.8256,
	"step": 1430
	},
	{
	"epoch": 0.9701052631578947,
	"grad_norm": 0.10319063812494278,
	"learning_rate": 6.591760299625469e-06,
	"loss": 2.8456,
	"step": 1440
	},
	{
	"epoch": 0.9768421052631578,
	"grad_norm": 0.08950542658567429,
	"learning_rate": 5.093632958801498e-06,
	"loss": 2.605,
	"step": 1450
	},
	{
	"epoch": 0.983578947368421,
	"grad_norm": 0.08379487693309784,
	"learning_rate": 3.5955056179775286e-06,
	"loss": 3.0334,
	"step": 1460
	},
	{
	"epoch": 0.9903157894736843,
	"grad_norm": 0.1561821848154068,
	"learning_rate": 2.097378277153558e-06,
	"loss": 3.0357,
	"step": 1470
	},
	{
	"epoch": 0.9970526315789474,
	"grad_norm": 0.07574011385440826,
	"learning_rate": 5.992509363295881e-07,
	"loss": 2.7458,
	"step": 1480
	}
	],
	"logging_steps": 10,
	"max_steps": 1484,
	"num_input_tokens_seen": 0,
	"num_train_epochs": 1,
	"save_steps": 500,
	"stateful_callbacks": {
	"TrainerControl": {
	"args": {
	"should_epoch_stop": false,
	"should_evaluate": false,
	"should_log": false,
	"should_save": true,
	"should_training_stop": true
	},
	"attributes": {}
	}
	},
	"total_flos": 2.32780044727799e+18,
	"train_batch_size": 4,
	"trial_name": null,
	"trial_params": null
	}