sunnyanna's picture
Upload fine-tuned HyperCLOVAX model
204af40 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 10.0,
"eval_steps": 500,
"global_step": 450,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.22346368715083798,
"grad_norm": 2.651468276977539,
"learning_rate": 1.777777777777778e-05,
"loss": 3.6282,
"mean_token_accuracy": 0.4188130386173725,
"num_tokens": 25449.0,
"step": 10
},
{
"epoch": 0.44692737430167595,
"grad_norm": 1.5548869371414185,
"learning_rate": 4e-05,
"loss": 3.2525,
"mean_token_accuracy": 0.45206254720687866,
"num_tokens": 49961.0,
"step": 20
},
{
"epoch": 0.6703910614525139,
"grad_norm": 1.3944281339645386,
"learning_rate": 6.222222222222222e-05,
"loss": 2.7616,
"mean_token_accuracy": 0.4963876515626907,
"num_tokens": 74318.0,
"step": 30
},
{
"epoch": 0.8938547486033519,
"grad_norm": 1.1315394639968872,
"learning_rate": 8.444444444444444e-05,
"loss": 2.282,
"mean_token_accuracy": 0.5633564636111259,
"num_tokens": 99279.0,
"step": 40
},
{
"epoch": 1.111731843575419,
"grad_norm": 1.4461969137191772,
"learning_rate": 9.998646205897309e-05,
"loss": 1.7485,
"mean_token_accuracy": 0.6460915727493091,
"num_tokens": 123123.0,
"step": 50
},
{
"epoch": 1.3351955307262569,
"grad_norm": 1.323089599609375,
"learning_rate": 9.974599143895107e-05,
"loss": 1.2522,
"mean_token_accuracy": 0.7487043648958206,
"num_tokens": 147861.0,
"step": 60
},
{
"epoch": 1.558659217877095,
"grad_norm": 1.4107621908187866,
"learning_rate": 9.920634257308216e-05,
"loss": 0.6935,
"mean_token_accuracy": 0.8642275393009186,
"num_tokens": 173170.0,
"step": 70
},
{
"epoch": 1.7821229050279328,
"grad_norm": 1.5649642944335938,
"learning_rate": 9.837076097314319e-05,
"loss": 0.4389,
"mean_token_accuracy": 0.9179576024413109,
"num_tokens": 198201.0,
"step": 80
},
{
"epoch": 2.0,
"grad_norm": 0.7766520380973816,
"learning_rate": 9.72442719251944e-05,
"loss": 0.2782,
"mean_token_accuracy": 0.9465488699766306,
"num_tokens": 223068.0,
"step": 90
},
{
"epoch": 2.223463687150838,
"grad_norm": 0.8976675868034363,
"learning_rate": 9.583365026691784e-05,
"loss": 0.1842,
"mean_token_accuracy": 0.9619620949029922,
"num_tokens": 247368.0,
"step": 100
},
{
"epoch": 2.446927374301676,
"grad_norm": 1.256354808807373,
"learning_rate": 9.414737964294636e-05,
"loss": 0.1671,
"mean_token_accuracy": 0.9649594113230705,
"num_tokens": 272690.0,
"step": 110
},
{
"epoch": 2.6703910614525137,
"grad_norm": 0.5383216142654419,
"learning_rate": 9.219560148322654e-05,
"loss": 0.1653,
"mean_token_accuracy": 0.9639525949954987,
"num_tokens": 297796.0,
"step": 120
},
{
"epoch": 2.893854748603352,
"grad_norm": 0.49042603373527527,
"learning_rate": 8.99900540112658e-05,
"loss": 0.161,
"mean_token_accuracy": 0.9645605370402336,
"num_tokens": 323013.0,
"step": 130
},
{
"epoch": 3.111731843575419,
"grad_norm": 0.5440304279327393,
"learning_rate": 8.754400164907497e-05,
"loss": 0.1292,
"mean_token_accuracy": 0.9709367400560623,
"num_tokens": 346094.0,
"step": 140
},
{
"epoch": 3.335195530726257,
"grad_norm": 0.7135694026947021,
"learning_rate": 8.487215524337357e-05,
"loss": 0.1022,
"mean_token_accuracy": 0.9754688128829002,
"num_tokens": 370649.0,
"step": 150
},
{
"epoch": 3.558659217877095,
"grad_norm": 0.7237837910652161,
"learning_rate": 8.199058359282674e-05,
"loss": 0.1168,
"mean_token_accuracy": 0.9735488459467888,
"num_tokens": 396363.0,
"step": 160
},
{
"epoch": 3.782122905027933,
"grad_norm": 0.768436849117279,
"learning_rate": 7.891661680839932e-05,
"loss": 0.1004,
"mean_token_accuracy": 0.9762797430157661,
"num_tokens": 420757.0,
"step": 170
},
{
"epoch": 4.0,
"grad_norm": 0.5389193892478943,
"learning_rate": 7.566874208802938e-05,
"loss": 0.0921,
"mean_token_accuracy": 0.9779695364145132,
"num_tokens": 446136.0,
"step": 180
},
{
"epoch": 4.223463687150838,
"grad_norm": 0.8165420889854431,
"learning_rate": 7.226649253244448e-05,
"loss": 0.0758,
"mean_token_accuracy": 0.9822951450943946,
"num_tokens": 470938.0,
"step": 190
},
{
"epoch": 4.446927374301676,
"grad_norm": 0.596653938293457,
"learning_rate": 6.873032967079561e-05,
"loss": 0.0759,
"mean_token_accuracy": 0.9825226783752441,
"num_tokens": 494883.0,
"step": 200
},
{
"epoch": 4.670391061452514,
"grad_norm": 0.5423166751861572,
"learning_rate": 6.508152040261328e-05,
"loss": 0.0647,
"mean_token_accuracy": 0.9844422772526741,
"num_tokens": 521194.0,
"step": 210
},
{
"epoch": 4.893854748603352,
"grad_norm": 0.4943588376045227,
"learning_rate": 6.134200909617135e-05,
"loss": 0.0686,
"mean_token_accuracy": 0.9831502199172973,
"num_tokens": 546333.0,
"step": 220
},
{
"epoch": 5.111731843575419,
"grad_norm": 0.5471933484077454,
"learning_rate": 5.753428561247416e-05,
"loss": 0.0727,
"mean_token_accuracy": 0.9841189751258264,
"num_tokens": 569821.0,
"step": 230
},
{
"epoch": 5.335195530726257,
"grad_norm": 0.792306661605835,
"learning_rate": 5.368125004858624e-05,
"loss": 0.0567,
"mean_token_accuracy": 0.987368130683899,
"num_tokens": 595559.0,
"step": 240
},
{
"epoch": 5.558659217877095,
"grad_norm": 0.4141499698162079,
"learning_rate": 4.9806075013753995e-05,
"loss": 0.0519,
"mean_token_accuracy": 0.9876407846808434,
"num_tokens": 620822.0,
"step": 250
},
{
"epoch": 5.782122905027933,
"grad_norm": 0.560502290725708,
"learning_rate": 4.593206626660709e-05,
"loss": 0.0534,
"mean_token_accuracy": 0.9872279047966004,
"num_tokens": 645049.0,
"step": 260
},
{
"epoch": 6.0,
"grad_norm": 0.6270470023155212,
"learning_rate": 4.2082522551583867e-05,
"loss": 0.0556,
"mean_token_accuracy": 0.9871117854729677,
"num_tokens": 669204.0,
"step": 270
},
{
"epoch": 6.223463687150838,
"grad_norm": 0.41528642177581787,
"learning_rate": 3.828059547754077e-05,
"loss": 0.0426,
"mean_token_accuracy": 0.9900245934724807,
"num_tokens": 693949.0,
"step": 280
},
{
"epoch": 6.446927374301676,
"grad_norm": 0.3252829611301422,
"learning_rate": 3.4549150281252636e-05,
"loss": 0.0453,
"mean_token_accuracy": 0.9897840306162834,
"num_tokens": 718020.0,
"step": 290
},
{
"epoch": 6.670391061452514,
"grad_norm": 0.2989633083343506,
"learning_rate": 3.091062831318825e-05,
"loss": 0.0457,
"mean_token_accuracy": 0.9903687924146652,
"num_tokens": 742902.0,
"step": 300
},
{
"epoch": 6.893854748603352,
"grad_norm": 0.2880214750766754,
"learning_rate": 2.738691207258812e-05,
"loss": 0.0449,
"mean_token_accuracy": 0.9903770983219147,
"num_tokens": 768980.0,
"step": 310
},
{
"epoch": 7.111731843575419,
"grad_norm": 0.25206810235977173,
"learning_rate": 2.399919360353923e-05,
"loss": 0.0438,
"mean_token_accuracy": 0.990071585545173,
"num_tokens": 792798.0,
"step": 320
},
{
"epoch": 7.335195530726257,
"grad_norm": 0.3471860885620117,
"learning_rate": 2.076784704352835e-05,
"loss": 0.0396,
"mean_token_accuracy": 0.9913395941257477,
"num_tokens": 817561.0,
"step": 330
},
{
"epoch": 7.558659217877095,
"grad_norm": 0.21744246780872345,
"learning_rate": 1.7712306090981896e-05,
"loss": 0.0395,
"mean_token_accuracy": 0.9915344551205635,
"num_tokens": 843470.0,
"step": 340
},
{
"epoch": 7.782122905027933,
"grad_norm": 0.2922864556312561,
"learning_rate": 1.4850947128716913e-05,
"loss": 0.0387,
"mean_token_accuracy": 0.9912592649459839,
"num_tokens": 867744.0,
"step": 350
},
{
"epoch": 8.0,
"grad_norm": 0.3212120831012726,
"learning_rate": 1.2200978706212607e-05,
"loss": 0.0401,
"mean_token_accuracy": 0.9908862557166662,
"num_tokens": 892272.0,
"step": 360
},
{
"epoch": 8.223463687150838,
"grad_norm": 0.2715793550014496,
"learning_rate": 9.7783380453689e-06,
"loss": 0.0381,
"mean_token_accuracy": 0.9915784135460853,
"num_tokens": 917435.0,
"step": 370
},
{
"epoch": 8.446927374301676,
"grad_norm": 0.26999297738075256,
"learning_rate": 7.597595192178702e-06,
"loss": 0.0382,
"mean_token_accuracy": 0.9912244379520416,
"num_tokens": 942005.0,
"step": 380
},
{
"epoch": 8.670391061452515,
"grad_norm": 0.2515369653701782,
"learning_rate": 5.6718653907569475e-06,
"loss": 0.0381,
"mean_token_accuracy": 0.9913900807499886,
"num_tokens": 966911.0,
"step": 390
},
{
"epoch": 8.893854748603353,
"grad_norm": 0.24382378160953522,
"learning_rate": 4.012730206719229e-06,
"loss": 0.0379,
"mean_token_accuracy": 0.9922321572899818,
"num_tokens": 992856.0,
"step": 400
},
{
"epoch": 9.111731843575418,
"grad_norm": 0.2050442099571228,
"learning_rate": 2.63016787428354e-06,
"loss": 0.0353,
"mean_token_accuracy": 0.9915406214885223,
"num_tokens": 1016741.0,
"step": 410
},
{
"epoch": 9.335195530726256,
"grad_norm": 0.3232128918170929,
"learning_rate": 1.5324932859955399e-06,
"loss": 0.0374,
"mean_token_accuracy": 0.9917459413409233,
"num_tokens": 1042654.0,
"step": 420
},
{
"epoch": 9.558659217877095,
"grad_norm": 0.21526314318180084,
"learning_rate": 7.263079859864297e-07,
"loss": 0.0374,
"mean_token_accuracy": 0.9920469373464584,
"num_tokens": 1066827.0,
"step": 430
},
{
"epoch": 9.782122905027933,
"grad_norm": 0.20906753838062286,
"learning_rate": 2.1646046750978254e-07,
"loss": 0.0378,
"mean_token_accuracy": 0.9913564190268517,
"num_tokens": 1091448.0,
"step": 440
},
{
"epoch": 10.0,
"grad_norm": 0.21541918814182281,
"learning_rate": 6.017013532627624e-09,
"loss": 0.0337,
"mean_token_accuracy": 0.9919140201348525,
"num_tokens": 1115340.0,
"step": 450
}
],
"logging_steps": 10,
"max_steps": 450,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.0661326932393984e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}