SecureCLI-Tuner-V2 / trainer_state.json
mwill-AImission's picture
Upload folder using huggingface_hub
8ce9395 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.20393596410727033,
"eval_steps": 50,
"global_step": 500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0,
"eval_loss": 4.004721641540527,
"eval_runtime": 173.7131,
"eval_samples_per_second": 7.052,
"eval_steps_per_second": 7.052,
"step": 0
},
{
"epoch": 0.004078719282145407,
"grad_norm": 0.7785049676895142,
"learning_rate": 3.6e-05,
"loss": 3.8826,
"step": 10
},
{
"epoch": 0.008157438564290813,
"grad_norm": 0.9169650077819824,
"learning_rate": 7.6e-05,
"loss": 3.8197,
"step": 20
},
{
"epoch": 0.012236157846436219,
"grad_norm": 1.1420930624008179,
"learning_rate": 0.000116,
"loss": 3.1268,
"step": 30
},
{
"epoch": 0.016314877128581626,
"grad_norm": 1.2675455808639526,
"learning_rate": 0.00015600000000000002,
"loss": 2.1309,
"step": 40
},
{
"epoch": 0.020393596410727032,
"grad_norm": 1.0368549823760986,
"learning_rate": 0.000196,
"loss": 1.5617,
"step": 50
},
{
"epoch": 0.020393596410727032,
"eval_loss": 1.423304557800293,
"eval_runtime": 173.1158,
"eval_samples_per_second": 7.076,
"eval_steps_per_second": 7.076,
"step": 50
},
{
"epoch": 0.024472315692872438,
"grad_norm": 0.8746965527534485,
"learning_rate": 0.00019980267284282717,
"loss": 1.3048,
"step": 60
},
{
"epoch": 0.028551034975017844,
"grad_norm": 4.214962959289551,
"learning_rate": 0.00019912155402515417,
"loss": 1.0715,
"step": 70
},
{
"epoch": 0.03262975425716325,
"grad_norm": 0.665006697177887,
"learning_rate": 0.0001979575249599344,
"loss": 1.0188,
"step": 80
},
{
"epoch": 0.03670847353930866,
"grad_norm": 0.5204000473022461,
"learning_rate": 0.00019631625667976583,
"loss": 0.9787,
"step": 90
},
{
"epoch": 0.040787192821454064,
"grad_norm": 0.5942240357398987,
"learning_rate": 0.00019420574527872968,
"loss": 0.9041,
"step": 100
},
{
"epoch": 0.040787192821454064,
"eval_loss": 0.9287890791893005,
"eval_runtime": 173.4553,
"eval_samples_per_second": 7.062,
"eval_steps_per_second": 7.062,
"step": 100
},
{
"epoch": 0.04486591210359947,
"grad_norm": 0.6731751561164856,
"learning_rate": 0.00019163627295622397,
"loss": 0.9193,
"step": 110
},
{
"epoch": 0.048944631385744876,
"grad_norm": 0.7434952855110168,
"learning_rate": 0.00018862035792312147,
"loss": 0.9247,
"step": 120
},
{
"epoch": 0.05302335066789028,
"grad_norm": 0.6485812664031982,
"learning_rate": 0.00018517269341430476,
"loss": 1.0027,
"step": 130
},
{
"epoch": 0.05710206995003569,
"grad_norm": 0.6013619303703308,
"learning_rate": 0.00018131007610470276,
"loss": 0.9267,
"step": 140
},
{
"epoch": 0.06118078923218109,
"grad_norm": 0.6937788724899292,
"learning_rate": 0.00017705132427757895,
"loss": 0.9152,
"step": 150
},
{
"epoch": 0.06118078923218109,
"eval_loss": 0.9047658443450928,
"eval_runtime": 173.377,
"eval_samples_per_second": 7.066,
"eval_steps_per_second": 7.066,
"step": 150
},
{
"epoch": 0.0652595085143265,
"grad_norm": 0.6242877244949341,
"learning_rate": 0.00017241718614374678,
"loss": 0.9777,
"step": 160
},
{
"epoch": 0.0693382277964719,
"grad_norm": 0.6055032014846802,
"learning_rate": 0.00016743023875837233,
"loss": 0.9655,
"step": 170
},
{
"epoch": 0.07341694707861732,
"grad_norm": 0.6116403937339783,
"learning_rate": 0.00016211477802783103,
"loss": 1.0206,
"step": 180
},
{
"epoch": 0.07749566636076272,
"grad_norm": 0.8313778042793274,
"learning_rate": 0.0001564967003424938,
"loss": 0.9532,
"step": 190
},
{
"epoch": 0.08157438564290813,
"grad_norm": 0.6586077213287354,
"learning_rate": 0.00015060337641211637,
"loss": 0.9392,
"step": 200
},
{
"epoch": 0.08157438564290813,
"eval_loss": 0.8928409218788147,
"eval_runtime": 173.3344,
"eval_samples_per_second": 7.067,
"eval_steps_per_second": 7.067,
"step": 200
},
{
"epoch": 0.08565310492505353,
"grad_norm": 0.6433130502700806,
"learning_rate": 0.00014446351791849276,
"loss": 0.9729,
"step": 210
},
{
"epoch": 0.08973182420719894,
"grad_norm": 0.7685410976409912,
"learning_rate": 0.00013810703763502744,
"loss": 0.9407,
"step": 220
},
{
"epoch": 0.09381054348934434,
"grad_norm": 0.7548204064369202,
"learning_rate": 0.00013156490369471027,
"loss": 0.8866,
"step": 230
},
{
"epoch": 0.09788926277148975,
"grad_norm": 0.6521336436271667,
"learning_rate": 0.0001248689887164855,
"loss": 0.8709,
"step": 240
},
{
"epoch": 0.10196798205363516,
"grad_norm": 0.7266818284988403,
"learning_rate": 0.00011805191452505602,
"loss": 0.977,
"step": 250
},
{
"epoch": 0.10196798205363516,
"eval_loss": 0.882000744342804,
"eval_runtime": 173.4293,
"eval_samples_per_second": 7.063,
"eval_steps_per_second": 7.063,
"step": 250
},
{
"epoch": 0.10604670133578056,
"grad_norm": 0.7059846520423889,
"learning_rate": 0.00011114689322063255,
"loss": 0.9515,
"step": 260
},
{
"epoch": 0.11012542061792598,
"grad_norm": 0.7184727787971497,
"learning_rate": 0.00010418756537291996,
"loss": 0.9049,
"step": 270
},
{
"epoch": 0.11420413990007137,
"grad_norm": 0.8101040124893188,
"learning_rate": 9.720783612764314e-05,
"loss": 0.9918,
"step": 280
},
{
"epoch": 0.11828285918221679,
"grad_norm": 0.5306759476661682,
"learning_rate": 9.024171002408506e-05,
"loss": 0.9756,
"step": 290
},
{
"epoch": 0.12236157846436219,
"grad_norm": 0.8896522521972656,
"learning_rate": 8.332312532838978e-05,
"loss": 0.8556,
"step": 300
},
{
"epoch": 0.12236157846436219,
"eval_loss": 0.873528003692627,
"eval_runtime": 173.4611,
"eval_samples_per_second": 7.062,
"eval_steps_per_second": 7.062,
"step": 300
},
{
"epoch": 0.12644029774650759,
"grad_norm": 0.715740978717804,
"learning_rate": 7.6485788689741e-05,
"loss": 0.8381,
"step": 310
},
{
"epoch": 0.130519017028653,
"grad_norm": 0.7031327486038208,
"learning_rate": 6.976301092495556e-05,
"loss": 0.8517,
"step": 320
},
{
"epoch": 0.1345977363107984,
"grad_norm": 0.6190944910049438,
"learning_rate": 6.318754473153221e-05,
"loss": 0.8552,
"step": 330
},
{
"epoch": 0.1386764555929438,
"grad_norm": 0.6262460947036743,
"learning_rate": 5.679142511980175e-05,
"loss": 0.9806,
"step": 340
},
{
"epoch": 0.14275517487508924,
"grad_norm": 0.5318371653556824,
"learning_rate": 5.0605813341576924e-05,
"loss": 0.8857,
"step": 350
},
{
"epoch": 0.14275517487508924,
"eval_loss": 0.8685455322265625,
"eval_runtime": 173.2693,
"eval_samples_per_second": 7.07,
"eval_steps_per_second": 7.07,
"step": 350
},
{
"epoch": 0.14683389415723463,
"grad_norm": 0.8289533853530884,
"learning_rate": 4.46608450756656e-05,
"loss": 0.9057,
"step": 360
},
{
"epoch": 0.15091261343938003,
"grad_norm": 0.7940821051597595,
"learning_rate": 3.8985483609873244e-05,
"loss": 0.8912,
"step": 370
},
{
"epoch": 0.15499133272152543,
"grad_norm": 0.6407303810119629,
"learning_rate": 3.360737873477584e-05,
"loss": 0.944,
"step": 380
},
{
"epoch": 0.15907005200367086,
"grad_norm": 0.741663932800293,
"learning_rate": 2.8552732036719687e-05,
"loss": 0.8752,
"step": 390
},
{
"epoch": 0.16314877128581626,
"grad_norm": 0.5255782604217529,
"learning_rate": 2.3846169246326343e-05,
"loss": 0.8946,
"step": 400
},
{
"epoch": 0.16314877128581626,
"eval_loss": 0.8629406690597534,
"eval_runtime": 173.2005,
"eval_samples_per_second": 7.073,
"eval_steps_per_second": 7.073,
"step": 400
},
{
"epoch": 0.16722749056796166,
"grad_norm": 0.6469287872314453,
"learning_rate": 1.9510620264408596e-05,
"loss": 0.9102,
"step": 410
},
{
"epoch": 0.17130620985010706,
"grad_norm": 0.7001602649688721,
"learning_rate": 1.5567207449798515e-05,
"loss": 0.8821,
"step": 420
},
{
"epoch": 0.17538492913225248,
"grad_norm": 0.9216477870941162,
"learning_rate": 1.2035142713338366e-05,
"loss": 0.931,
"step": 430
},
{
"epoch": 0.17946364841439788,
"grad_norm": 0.6395500898361206,
"learning_rate": 8.931633919382298e-06,
"loss": 0.8528,
"step": 440
},
{
"epoch": 0.18354236769654328,
"grad_norm": 0.7768850922584534,
"learning_rate": 6.2718010508108545e-06,
"loss": 0.8807,
"step": 450
},
{
"epoch": 0.18354236769654328,
"eval_loss": 0.8612557649612427,
"eval_runtime": 173.4106,
"eval_samples_per_second": 7.064,
"eval_steps_per_second": 7.064,
"step": 450
},
{
"epoch": 0.18762108697868868,
"grad_norm": 0.7101565003395081,
"learning_rate": 4.068602545994249e-06,
"loss": 0.9011,
"step": 460
},
{
"epoch": 0.1916998062608341,
"grad_norm": 0.7115808725357056,
"learning_rate": 2.332772166583208e-06,
"loss": 0.9117,
"step": 470
},
{
"epoch": 0.1957785255429795,
"grad_norm": 0.5874737501144409,
"learning_rate": 1.0727667037011668e-06,
"loss": 0.8661,
"step": 480
},
{
"epoch": 0.1998572448251249,
"grad_norm": 0.6979043483734131,
"learning_rate": 2.947247773079753e-07,
"loss": 0.9236,
"step": 490
},
{
"epoch": 0.20393596410727033,
"grad_norm": 0.7868551015853882,
"learning_rate": 2.4369294605253166e-09,
"loss": 0.813,
"step": 500
},
{
"epoch": 0.20393596410727033,
"eval_loss": 0.8608318567276001,
"eval_runtime": 173.2348,
"eval_samples_per_second": 7.071,
"eval_steps_per_second": 7.071,
"step": 500
}
],
"logging_steps": 10,
"max_steps": 500,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 100,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 4.3472888266752e+16,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}