31c / trainer_state.json
EmiliaLee's picture
Upload folder using huggingface_hub
91cead1 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 682,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.014670823399963322,
"grad_norm": 4.686307907104492,
"learning_rate": 2.608695652173913e-05,
"loss": 1.086164951324463,
"step": 10
},
{
"epoch": 0.029341646799926645,
"grad_norm": 1.3878921270370483,
"learning_rate": 5.507246376811594e-05,
"loss": 0.13360737562179564,
"step": 20
},
{
"epoch": 0.04401247019988997,
"grad_norm": 0.7091318368911743,
"learning_rate": 8.405797101449276e-05,
"loss": 0.09536288380622863,
"step": 30
},
{
"epoch": 0.05868329359985329,
"grad_norm": 0.9854250550270081,
"learning_rate": 0.00011304347826086956,
"loss": 0.06767122745513916,
"step": 40
},
{
"epoch": 0.07335411699981662,
"grad_norm": 0.6373249888420105,
"learning_rate": 0.00014202898550724638,
"loss": 0.07146756052970886,
"step": 50
},
{
"epoch": 0.08802494039977994,
"grad_norm": 0.6071982979774475,
"learning_rate": 0.0001710144927536232,
"loss": 0.07726313471794129,
"step": 60
},
{
"epoch": 0.10269576379974327,
"grad_norm": 0.9975550174713135,
"learning_rate": 0.0002,
"loss": 0.06706693768501282,
"step": 70
},
{
"epoch": 0.11736658719970658,
"grad_norm": 0.5094273090362549,
"learning_rate": 0.00019986870332074194,
"loss": 0.07236328125,
"step": 80
},
{
"epoch": 0.13203741059966992,
"grad_norm": 0.7871644496917725,
"learning_rate": 0.00019947515805932744,
"loss": 0.07177542448043824,
"step": 90
},
{
"epoch": 0.14670823399963323,
"grad_norm": 1.741397738456726,
"learning_rate": 0.0001988203976394757,
"loss": 0.048826560378074646,
"step": 100
},
{
"epoch": 0.16137905739959654,
"grad_norm": 0.5896034836769104,
"learning_rate": 0.0001979061414185635,
"loss": 0.06617986559867858,
"step": 110
},
{
"epoch": 0.17604988079955988,
"grad_norm": 0.7956095337867737,
"learning_rate": 0.0001967347901727067,
"loss": 0.08305451273918152,
"step": 120
},
{
"epoch": 0.1907207041995232,
"grad_norm": 0.492136150598526,
"learning_rate": 0.0001953094197924819,
"loss": 0.06995530128479004,
"step": 130
},
{
"epoch": 0.20539152759948653,
"grad_norm": 0.6558647751808167,
"learning_rate": 0.00019363377320584174,
"loss": 0.05795600414276123,
"step": 140
},
{
"epoch": 0.22006235099944985,
"grad_norm": 0.5106936097145081,
"learning_rate": 0.000191712250549435,
"loss": 0.055801987648010254,
"step": 150
},
{
"epoch": 0.23473317439941316,
"grad_norm": 0.44069793820381165,
"learning_rate": 0.0001895498976141398,
"loss": 0.06355689167976379,
"step": 160
},
{
"epoch": 0.2494039977993765,
"grad_norm": 0.405668169260025,
"learning_rate": 0.00018715239259515184,
"loss": 0.05164743065834045,
"step": 170
},
{
"epoch": 0.26407482119933984,
"grad_norm": 0.3792116045951843,
"learning_rate": 0.00018452603118142112,
"loss": 0.0679062008857727,
"step": 180
},
{
"epoch": 0.2787456445993031,
"grad_norm": 0.6442562937736511,
"learning_rate": 0.00018167771002359072,
"loss": 0.06293455362319947,
"step": 190
},
{
"epoch": 0.29341646799926646,
"grad_norm": 0.5488963723182678,
"learning_rate": 0.0001786149086238503,
"loss": 0.0556623637676239,
"step": 200
},
{
"epoch": 0.3080872913992298,
"grad_norm": 0.29444584250450134,
"learning_rate": 0.0001753456696952601,
"loss": 0.0685071349143982,
"step": 210
},
{
"epoch": 0.3227581147991931,
"grad_norm": 0.414754718542099,
"learning_rate": 0.0001718785780421207,
"loss": 0.06181240677833557,
"step": 220
},
{
"epoch": 0.3374289381991564,
"grad_norm": 0.32311928272247314,
"learning_rate": 0.00016822273801684682,
"loss": 0.07574231624603271,
"step": 230
},
{
"epoch": 0.35209976159911976,
"grad_norm": 0.48792004585266113,
"learning_rate": 0.00016438774961254285,
"loss": 0.043923291563987735,
"step": 240
},
{
"epoch": 0.36677058499908305,
"grad_norm": 1.073688268661499,
"learning_rate": 0.00016038368325405834,
"loss": 0.05729702115058899,
"step": 250
},
{
"epoch": 0.3814414083990464,
"grad_norm": 0.5740509629249573,
"learning_rate": 0.00015622105335372127,
"loss": 0.06541360020637513,
"step": 260
},
{
"epoch": 0.39611223179900973,
"grad_norm": 0.6383430361747742,
"learning_rate": 0.0001519107907011895,
"loss": 0.057945191860198975,
"step": 270
},
{
"epoch": 0.41078305519897307,
"grad_norm": 0.46824830770492554,
"learning_rate": 0.0001474642137599232,
"loss": 0.05833690166473389,
"step": 280
},
{
"epoch": 0.42545387859893635,
"grad_norm": 0.689471960067749,
"learning_rate": 0.00014289299894565147,
"loss": 0.055763131380081175,
"step": 290
},
{
"epoch": 0.4401247019988997,
"grad_norm": 0.362657368183136,
"learning_rate": 0.00013820914996488004,
"loss": 0.07186369895935059,
"step": 300
},
{
"epoch": 0.45479552539886303,
"grad_norm": 0.40035027265548706,
"learning_rate": 0.00013342496629395538,
"loss": 0.06522900462150574,
"step": 310
},
{
"epoch": 0.4694663487988263,
"grad_norm": 0.4143030345439911,
"learning_rate": 0.00012855301088145652,
"loss": 0.040158060193061826,
"step": 320
},
{
"epoch": 0.48413717219878966,
"grad_norm": 0.24200226366519928,
"learning_rate": 0.0001236060771587266,
"loss": 0.05714940428733826,
"step": 330
},
{
"epoch": 0.498807995598753,
"grad_norm": 0.2773888111114502,
"learning_rate": 0.00011859715544517164,
"loss": 0.04442446827888489,
"step": 340
},
{
"epoch": 0.5134788189987163,
"grad_norm": 0.3424382209777832,
"learning_rate": 0.00011353939883654476,
"loss": 0.0499860942363739,
"step": 350
},
{
"epoch": 0.5281496423986797,
"grad_norm": 0.4579457938671112,
"learning_rate": 0.0001084460886657901,
"loss": 0.05433698296546936,
"step": 360
},
{
"epoch": 0.542820465798643,
"grad_norm": 0.6211843490600586,
"learning_rate": 0.00010333059962714469,
"loss": 0.048888799548149106,
"step": 370
},
{
"epoch": 0.5574912891986062,
"grad_norm": 0.5006217360496521,
"learning_rate": 9.820636465507961e-05,
"loss": 0.05106990933418274,
"step": 380
},
{
"epoch": 0.5721621125985696,
"grad_norm": 0.3458799719810486,
"learning_rate": 9.308683965030631e-05,
"loss": 0.03896746933460236,
"step": 390
},
{
"epoch": 0.5868329359985329,
"grad_norm": 0.29490038752555847,
"learning_rate": 8.798546814547487e-05,
"loss": 0.044534245133399965,
"step": 400
},
{
"epoch": 0.6015037593984962,
"grad_norm": 0.28574299812316895,
"learning_rate": 8.291564600335022e-05,
"loss": 0.04822182059288025,
"step": 410
},
{
"epoch": 0.6161745827984596,
"grad_norm": 0.3831021785736084,
"learning_rate": 7.789068624016616e-05,
"loss": 0.04143353998661041,
"step": 420
},
{
"epoch": 0.6308454061984229,
"grad_norm": 0.3398614525794983,
"learning_rate": 7.292378406652891e-05,
"loss": 0.039598295092582704,
"step": 430
},
{
"epoch": 0.6455162295983862,
"grad_norm": 0.22855930030345917,
"learning_rate": 6.802798223767044e-05,
"loss": 0.04850543141365051,
"step": 440
},
{
"epoch": 0.6601870529983496,
"grad_norm": 0.26312509179115295,
"learning_rate": 6.321613680403946e-05,
"loss": 0.05014724731445312,
"step": 450
},
{
"epoch": 0.6748578763983129,
"grad_norm": 0.343281090259552,
"learning_rate": 5.8500883352166715e-05,
"loss": 0.03788905143737793,
"step": 460
},
{
"epoch": 0.6895286997982761,
"grad_norm": 0.6305585503578186,
"learning_rate": 5.3894603824454056e-05,
"loss": 0.05027334094047546,
"step": 470
},
{
"epoch": 0.7041995231982395,
"grad_norm": 0.09157969057559967,
"learning_rate": 4.940939400501593e-05,
"loss": 0.04882456958293915,
"step": 480
},
{
"epoch": 0.7188703465982028,
"grad_norm": 0.2803705632686615,
"learning_rate": 4.505703175695366e-05,
"loss": 0.051465296745300294,
"step": 490
},
{
"epoch": 0.7335411699981661,
"grad_norm": 0.32852405309677124,
"learning_rate": 4.0848946094469334e-05,
"loss": 0.04661123156547546,
"step": 500
},
{
"epoch": 0.7482119933981295,
"grad_norm": 0.31889474391937256,
"learning_rate": 3.679618717103316e-05,
"loss": 0.04635309278964996,
"step": 510
},
{
"epoch": 0.7628828167980928,
"grad_norm": 0.2887394428253174,
"learning_rate": 3.2909397262414845e-05,
"loss": 0.04521143436431885,
"step": 520
},
{
"epoch": 0.7775536401980561,
"grad_norm": 0.4780530035495758,
"learning_rate": 2.9198782820773828e-05,
"loss": 0.041201579570770266,
"step": 530
},
{
"epoch": 0.7922244635980195,
"grad_norm": 0.4354000985622406,
"learning_rate": 2.5674087673194115e-05,
"loss": 0.036979615688323975,
"step": 540
},
{
"epoch": 0.8068952869979827,
"grad_norm": 0.11631964892148972,
"learning_rate": 2.2344567435041054e-05,
"loss": 0.03683710396289826,
"step": 550
},
{
"epoch": 0.8215661103979461,
"grad_norm": 0.40628868341445923,
"learning_rate": 1.9218965205330576e-05,
"loss": 0.04675011336803436,
"step": 560
},
{
"epoch": 0.8362369337979094,
"grad_norm": 0.31028568744659424,
"learning_rate": 1.6305488607931486e-05,
"loss": 0.033157148957252504,
"step": 570
},
{
"epoch": 0.8509077571978727,
"grad_norm": 0.26061493158340454,
"learning_rate": 1.3611788238890511e-05,
"loss": 0.04655841886997223,
"step": 580
},
{
"epoch": 0.8655785805978361,
"grad_norm": 0.2419964224100113,
"learning_rate": 1.114493757647508e-05,
"loss": 0.030328187346458434,
"step": 590
},
{
"epoch": 0.8802494039977994,
"grad_norm": 0.3614746332168579,
"learning_rate": 8.911414406689145e-06,
"loss": 0.03218616545200348,
"step": 600
},
{
"epoch": 0.8949202273977627,
"grad_norm": 0.37310630083084106,
"learning_rate": 6.9170838130375505e-06,
"loss": 0.04881116449832916,
"step": 610
},
{
"epoch": 0.9095910507977261,
"grad_norm": 0.2813904881477356,
"learning_rate": 5.167182775206026e-06,
"loss": 0.05659586191177368,
"step": 620
},
{
"epoch": 0.9242618741976893,
"grad_norm": 0.13661852478981018,
"learning_rate": 3.6663064171005956e-06,
"loss": 0.042176204919815066,
"step": 630
},
{
"epoch": 0.9389326975976526,
"grad_norm": 0.3304874300956726,
"learning_rate": 2.418395940357099e-06,
"loss": 0.05347890257835388,
"step": 640
},
{
"epoch": 0.953603520997616,
"grad_norm": 0.34348776936531067,
"learning_rate": 1.4267282750077493e-06,
"loss": 0.04495801329612732,
"step": 650
},
{
"epoch": 0.9682743443975793,
"grad_norm": 0.27644288539886475,
"learning_rate": 6.93907474480282e-07,
"loss": 0.04055593609809875,
"step": 660
},
{
"epoch": 0.9829451677975426,
"grad_norm": 0.2929557263851166,
"learning_rate": 2.2185787752672104e-07,
"loss": 0.03816842138767242,
"step": 670
},
{
"epoch": 0.997615991197506,
"grad_norm": 0.2137778252363205,
"learning_rate": 1.1819055037554095e-08,
"loss": 0.05327551364898682,
"step": 680
},
{
"epoch": 1.0,
"step": 682,
"total_flos": 3.3343785449628713e+18,
"train_loss": 0.07003523399807578,
"train_runtime": 66098.6339,
"train_samples_per_second": 0.165,
"train_steps_per_second": 0.01
}
],
"logging_steps": 10,
"max_steps": 682,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 3.3343785449628713e+18,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}