irc-mistral-24b-lora / checkpoint-72 /trainer_state.json
david-ar's picture
Upload folder using huggingface_hub
7403d51 verified
{
"best_metric": 0.8936182856559753,
"best_model_checkpoint": "./outputs/public-irc-mistral-24b/checkpoint-48",
"epoch": 2.974619289340102,
"eval_steps": 500,
"global_step": 72,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.04060913705583756,
"grad_norm": 3.373054027557373,
"learning_rate": 2e-05,
"loss": 1.2957,
"step": 1
},
{
"epoch": 0.08121827411167512,
"grad_norm": 3.194347381591797,
"learning_rate": 4e-05,
"loss": 1.3221,
"step": 2
},
{
"epoch": 0.1218274111675127,
"grad_norm": 1.2986558675765991,
"learning_rate": 6.000000000000001e-05,
"loss": 1.2683,
"step": 3
},
{
"epoch": 0.16243654822335024,
"grad_norm": 1.1605945825576782,
"learning_rate": 8e-05,
"loss": 1.1636,
"step": 4
},
{
"epoch": 0.20304568527918782,
"grad_norm": 0.811213493347168,
"learning_rate": 7.997668089464696e-05,
"loss": 1.0964,
"step": 5
},
{
"epoch": 0.2436548223350254,
"grad_norm": 0.7070867419242859,
"learning_rate": 7.990675076762158e-05,
"loss": 1.0897,
"step": 6
},
{
"epoch": 0.28426395939086296,
"grad_norm": 0.674956738948822,
"learning_rate": 7.97902911543238e-05,
"loss": 1.0602,
"step": 7
},
{
"epoch": 0.3248730964467005,
"grad_norm": 0.6653350591659546,
"learning_rate": 7.962743784145323e-05,
"loss": 1.0097,
"step": 8
},
{
"epoch": 0.36548223350253806,
"grad_norm": 0.6503349542617798,
"learning_rate": 7.941838070868787e-05,
"loss": 1.0102,
"step": 9
},
{
"epoch": 0.40609137055837563,
"grad_norm": 0.49681031703948975,
"learning_rate": 7.916336350729293e-05,
"loss": 1.0227,
"step": 10
},
{
"epoch": 0.4467005076142132,
"grad_norm": 0.5602617263793945,
"learning_rate": 7.886268357591766e-05,
"loss": 0.9935,
"step": 11
},
{
"epoch": 0.4873096446700508,
"grad_norm": 0.48682689666748047,
"learning_rate": 7.851669149391198e-05,
"loss": 0.9811,
"step": 12
},
{
"epoch": 0.5279187817258884,
"grad_norm": 0.5210645198822021,
"learning_rate": 7.812579067256644e-05,
"loss": 0.9828,
"step": 13
},
{
"epoch": 0.5685279187817259,
"grad_norm": 0.46042340993881226,
"learning_rate": 7.769043688475283e-05,
"loss": 0.9629,
"step": 14
},
{
"epoch": 0.6091370558375635,
"grad_norm": 0.4750231206417084,
"learning_rate": 7.721113773351333e-05,
"loss": 0.9599,
"step": 15
},
{
"epoch": 0.649746192893401,
"grad_norm": 0.44704967737197876,
"learning_rate": 7.668845206021812e-05,
"loss": 0.9417,
"step": 16
},
{
"epoch": 0.6903553299492385,
"grad_norm": 0.45399850606918335,
"learning_rate": 7.612298929298132e-05,
"loss": 0.9442,
"step": 17
},
{
"epoch": 0.7309644670050761,
"grad_norm": 0.48387065529823303,
"learning_rate": 7.551540873609502e-05,
"loss": 0.9388,
"step": 18
},
{
"epoch": 0.7715736040609137,
"grad_norm": 0.4469858705997467,
"learning_rate": 7.486641880131006e-05,
"loss": 0.9357,
"step": 19
},
{
"epoch": 0.8121827411167513,
"grad_norm": 0.4322652816772461,
"learning_rate": 7.417677618185955e-05,
"loss": 0.9191,
"step": 20
},
{
"epoch": 0.8527918781725888,
"grad_norm": 0.4220181703567505,
"learning_rate": 7.344728497018844e-05,
"loss": 0.9269,
"step": 21
},
{
"epoch": 0.8934010152284264,
"grad_norm": 0.4223184287548065,
"learning_rate": 7.267879572041768e-05,
"loss": 0.9092,
"step": 22
},
{
"epoch": 0.934010152284264,
"grad_norm": 0.5329856276512146,
"learning_rate": 7.187220445663618e-05,
"loss": 0.8954,
"step": 23
},
{
"epoch": 0.9746192893401016,
"grad_norm": 0.504860520362854,
"learning_rate": 7.10284516281768e-05,
"loss": 0.9145,
"step": 24
},
{
"epoch": 0.9746192893401016,
"eval_loss": 0.9128336310386658,
"eval_runtime": 89.3062,
"eval_samples_per_second": 0.84,
"eval_steps_per_second": 0.426,
"step": 24
},
{
"epoch": 1.0406091370558375,
"grad_norm": 1.4868385791778564,
"learning_rate": 7.014852101309445e-05,
"loss": 1.6092,
"step": 25
},
{
"epoch": 1.0812182741116751,
"grad_norm": 0.43014273047447205,
"learning_rate": 6.923343857112497e-05,
"loss": 0.7399,
"step": 26
},
{
"epoch": 1.1218274111675126,
"grad_norm": 0.4565065801143646,
"learning_rate": 6.828427124746191e-05,
"loss": 0.7567,
"step": 27
},
{
"epoch": 1.1624365482233503,
"grad_norm": 0.511966347694397,
"learning_rate": 6.730212572874618e-05,
"loss": 0.7053,
"step": 28
},
{
"epoch": 1.2030456852791878,
"grad_norm": 0.4735160171985626,
"learning_rate": 6.628814715271891e-05,
"loss": 0.726,
"step": 29
},
{
"epoch": 1.2436548223350254,
"grad_norm": 0.5390828251838684,
"learning_rate": 6.524351777304212e-05,
"loss": 0.7107,
"step": 30
},
{
"epoch": 1.284263959390863,
"grad_norm": 0.45148932933807373,
"learning_rate": 6.416945558084379e-05,
"loss": 0.7038,
"step": 31
},
{
"epoch": 1.3248730964467006,
"grad_norm": 0.4815531075000763,
"learning_rate": 6.30672128845947e-05,
"loss": 0.6963,
"step": 32
},
{
"epoch": 1.365482233502538,
"grad_norm": 0.47509998083114624,
"learning_rate": 6.193807484997275e-05,
"loss": 0.7048,
"step": 33
},
{
"epoch": 1.4060913705583755,
"grad_norm": 0.4874464273452759,
"learning_rate": 6.078335800141735e-05,
"loss": 0.7139,
"step": 34
},
{
"epoch": 1.4467005076142132,
"grad_norm": 0.46367257833480835,
"learning_rate": 5.96044086871207e-05,
"loss": 0.6811,
"step": 35
},
{
"epoch": 1.487309644670051,
"grad_norm": 0.44525542855262756,
"learning_rate": 5.840260150924609e-05,
"loss": 0.6935,
"step": 36
},
{
"epoch": 1.5279187817258884,
"grad_norm": 0.4398341774940491,
"learning_rate": 5.717933772120329e-05,
"loss": 0.7027,
"step": 37
},
{
"epoch": 1.5685279187817258,
"grad_norm": 0.42870181798934937,
"learning_rate": 5.593604359384967e-05,
"loss": 0.6933,
"step": 38
},
{
"epoch": 1.6091370558375635,
"grad_norm": 0.4329513609409332,
"learning_rate": 5.467416875252227e-05,
"loss": 0.6596,
"step": 39
},
{
"epoch": 1.649746192893401,
"grad_norm": 0.44533371925354004,
"learning_rate": 5.339518448683945e-05,
"loss": 0.6675,
"step": 40
},
{
"epoch": 1.6903553299492384,
"grad_norm": 0.45296040177345276,
"learning_rate": 5.210058203524304e-05,
"loss": 0.6794,
"step": 41
},
{
"epoch": 1.7309644670050761,
"grad_norm": 0.46650972962379456,
"learning_rate": 5.0791870846280974e-05,
"loss": 0.6615,
"step": 42
},
{
"epoch": 1.7715736040609138,
"grad_norm": 0.43552765250205994,
"learning_rate": 4.9470576818657873e-05,
"loss": 0.6594,
"step": 43
},
{
"epoch": 1.8121827411167513,
"grad_norm": 0.44811365008354187,
"learning_rate": 4.8138240522105365e-05,
"loss": 0.6609,
"step": 44
},
{
"epoch": 1.8527918781725887,
"grad_norm": 0.45581039786338806,
"learning_rate": 4.679641540114667e-05,
"loss": 0.6727,
"step": 45
},
{
"epoch": 1.8934010152284264,
"grad_norm": 0.4468774199485779,
"learning_rate": 4.5446665963849874e-05,
"loss": 0.6528,
"step": 46
},
{
"epoch": 1.934010152284264,
"grad_norm": 0.44559335708618164,
"learning_rate": 4.409056595768137e-05,
"loss": 0.6722,
"step": 47
},
{
"epoch": 1.9746192893401016,
"grad_norm": 0.48416054248809814,
"learning_rate": 4.272969653458685e-05,
"loss": 0.6565,
"step": 48
},
{
"epoch": 1.9746192893401016,
"eval_loss": 0.8936182856559753,
"eval_runtime": 89.4083,
"eval_samples_per_second": 0.839,
"eval_steps_per_second": 0.425,
"step": 48
},
{
"epoch": 2.0406091370558377,
"grad_norm": 1.8757866621017456,
"learning_rate": 4.136564440743872e-05,
"loss": 1.1625,
"step": 49
},
{
"epoch": 2.081218274111675,
"grad_norm": 0.5622301697731018,
"learning_rate": 4e-05,
"loss": 0.5237,
"step": 50
},
{
"epoch": 2.1218274111675126,
"grad_norm": 0.4795697331428528,
"learning_rate": 3.8634355592561286e-05,
"loss": 0.5147,
"step": 51
},
{
"epoch": 2.1624365482233503,
"grad_norm": 0.9597147703170776,
"learning_rate": 3.727030346541317e-05,
"loss": 0.5149,
"step": 52
},
{
"epoch": 2.203045685279188,
"grad_norm": 0.629220187664032,
"learning_rate": 3.590943404231863e-05,
"loss": 0.4905,
"step": 53
},
{
"epoch": 2.2436548223350252,
"grad_norm": 0.46845948696136475,
"learning_rate": 3.4553334036150146e-05,
"loss": 0.488,
"step": 54
},
{
"epoch": 2.284263959390863,
"grad_norm": 0.502011239528656,
"learning_rate": 3.3203584598853335e-05,
"loss": 0.4628,
"step": 55
},
{
"epoch": 2.3248730964467006,
"grad_norm": 0.5105593800544739,
"learning_rate": 3.1861759477894656e-05,
"loss": 0.4941,
"step": 56
},
{
"epoch": 2.3654822335025383,
"grad_norm": 0.48986580967903137,
"learning_rate": 3.052942318134213e-05,
"loss": 0.4881,
"step": 57
},
{
"epoch": 2.4060913705583755,
"grad_norm": 0.5030830502510071,
"learning_rate": 2.9208129153719026e-05,
"loss": 0.4953,
"step": 58
},
{
"epoch": 2.446700507614213,
"grad_norm": 0.47233346104621887,
"learning_rate": 2.7899417964756973e-05,
"loss": 0.4572,
"step": 59
},
{
"epoch": 2.487309644670051,
"grad_norm": 0.49828535318374634,
"learning_rate": 2.6604815513160556e-05,
"loss": 0.472,
"step": 60
},
{
"epoch": 2.527918781725888,
"grad_norm": 0.4967016577720642,
"learning_rate": 2.5325831247477747e-05,
"loss": 0.4835,
"step": 61
},
{
"epoch": 2.568527918781726,
"grad_norm": 0.5143364667892456,
"learning_rate": 2.4063956406150345e-05,
"loss": 0.4703,
"step": 62
},
{
"epoch": 2.6091370558375635,
"grad_norm": 0.5132246613502502,
"learning_rate": 2.282066227879673e-05,
"loss": 0.4804,
"step": 63
},
{
"epoch": 2.649746192893401,
"grad_norm": 0.4817405641078949,
"learning_rate": 2.1597398490753917e-05,
"loss": 0.4722,
"step": 64
},
{
"epoch": 2.6903553299492384,
"grad_norm": 0.4971908926963806,
"learning_rate": 2.0395591312879324e-05,
"loss": 0.4817,
"step": 65
},
{
"epoch": 2.730964467005076,
"grad_norm": 0.48459553718566895,
"learning_rate": 1.9216641998582666e-05,
"loss": 0.4628,
"step": 66
},
{
"epoch": 2.771573604060914,
"grad_norm": 0.4817572832107544,
"learning_rate": 1.8061925150027244e-05,
"loss": 0.4633,
"step": 67
},
{
"epoch": 2.812182741116751,
"grad_norm": 0.4870697855949402,
"learning_rate": 1.6932787115405318e-05,
"loss": 0.4683,
"step": 68
},
{
"epoch": 2.8527918781725887,
"grad_norm": 0.49406981468200684,
"learning_rate": 1.5830544419156223e-05,
"loss": 0.4738,
"step": 69
},
{
"epoch": 2.8934010152284264,
"grad_norm": 0.4873085021972656,
"learning_rate": 1.47564822269579e-05,
"loss": 0.4624,
"step": 70
},
{
"epoch": 2.934010152284264,
"grad_norm": 0.851365864276886,
"learning_rate": 1.3711852847281098e-05,
"loss": 0.4654,
"step": 71
},
{
"epoch": 2.974619289340102,
"grad_norm": 0.4922949969768524,
"learning_rate": 1.2697874271253844e-05,
"loss": 0.4671,
"step": 72
},
{
"epoch": 2.974619289340102,
"eval_loss": 0.9503005743026733,
"eval_runtime": 89.423,
"eval_samples_per_second": 0.839,
"eval_steps_per_second": 0.425,
"step": 72
}
],
"logging_steps": 1,
"max_steps": 96,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.342087885624443e+18,
"train_batch_size": 1,
"trial_name": null,
"trial_params": null
}