8-clusters-balanced-lex-best-1 / trainer_state.json
MHGanainy's picture
MHGanainy/8-clusters-balanced-lex-best-1
8c61d11 verified
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 1.0,
"eval_steps": 500,
"global_step": 2888,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03462603878116344,
"grad_norm": 0.14943788945674896,
"learning_rate": 6.944444444444445e-06,
"loss": 2.3885,
"step": 100
},
{
"epoch": 0.06925207756232687,
"grad_norm": 0.2681087851524353,
"learning_rate": 1.388888888888889e-05,
"loss": 2.3454,
"step": 200
},
{
"epoch": 0.1038781163434903,
"grad_norm": 0.4204924404621124,
"learning_rate": 1.9998948817948157e-05,
"loss": 2.258,
"step": 300
},
{
"epoch": 0.13850415512465375,
"grad_norm": 0.5873008966445923,
"learning_rate": 1.9908568428746408e-05,
"loss": 2.165,
"step": 400
},
{
"epoch": 0.1731301939058172,
"grad_norm": 0.7970355153083801,
"learning_rate": 1.9673698799700582e-05,
"loss": 2.0815,
"step": 500
},
{
"epoch": 0.2077562326869806,
"grad_norm": 0.7298622131347656,
"learning_rate": 1.9297764858882516e-05,
"loss": 2.0287,
"step": 600
},
{
"epoch": 0.24238227146814403,
"grad_norm": 0.7727171778678894,
"learning_rate": 1.8786248569678847e-05,
"loss": 2.037,
"step": 700
},
{
"epoch": 0.2770083102493075,
"grad_norm": 0.791151225566864,
"learning_rate": 1.8146608991420533e-05,
"loss": 1.9875,
"step": 800
},
{
"epoch": 0.31163434903047094,
"grad_norm": 0.9811259508132935,
"learning_rate": 1.7388173509501475e-05,
"loss": 1.9107,
"step": 900
},
{
"epoch": 0.3462603878116344,
"grad_norm": 0.9305247068405151,
"learning_rate": 1.652200182109602e-05,
"loss": 1.8919,
"step": 1000
},
{
"epoch": 0.3808864265927978,
"grad_norm": 1.0283896923065186,
"learning_rate": 1.5560724659869905e-05,
"loss": 1.878,
"step": 1100
},
{
"epoch": 0.4155124653739612,
"grad_norm": 1.1012296676635742,
"learning_rate": 1.4518359611441452e-05,
"loss": 1.8174,
"step": 1200
},
{
"epoch": 0.45013850415512463,
"grad_norm": 1.0275688171386719,
"learning_rate": 1.3410106705418424e-05,
"loss": 1.8528,
"step": 1300
},
{
"epoch": 0.48476454293628807,
"grad_norm": 0.9701151251792908,
"learning_rate": 1.2252126764738845e-05,
"loss": 1.9103,
"step": 1400
},
{
"epoch": 0.5193905817174516,
"grad_norm": 0.9857981204986572,
"learning_rate": 1.106130574448156e-05,
"loss": 1.796,
"step": 1500
},
{
"epoch": 0.554016620498615,
"grad_norm": 1.2235304117202759,
"learning_rate": 9.855008496617326e-06,
"loss": 1.8084,
"step": 1600
},
{
"epoch": 0.5886426592797784,
"grad_norm": 1.137815237045288,
"learning_rate": 8.650825551364844e-06,
"loss": 1.878,
"step": 1700
},
{
"epoch": 0.6232686980609419,
"grad_norm": 1.0799235105514526,
"learning_rate": 7.4663166076497376e-06,
"loss": 1.8077,
"step": 1800
},
{
"epoch": 0.6578947368421053,
"grad_norm": 0.8830498456954956,
"learning_rate": 6.318754473153221e-06,
"loss": 1.8718,
"step": 1900
},
{
"epoch": 0.6925207756232687,
"grad_norm": 0.9494577050209045,
"learning_rate": 5.224873187881136e-06,
"loss": 1.8099,
"step": 2000
},
{
"epoch": 0.7271468144044322,
"grad_norm": 0.9683797955513,
"learning_rate": 4.200624004178883e-06,
"loss": 1.8241,
"step": 2100
},
{
"epoch": 0.7617728531855956,
"grad_norm": 1.5775721073150635,
"learning_rate": 3.2609427815531426e-06,
"loss": 1.7979,
"step": 2200
},
{
"epoch": 0.796398891966759,
"grad_norm": 1.1023374795913696,
"learning_rate": 2.4195321882076295e-06,
"loss": 1.8127,
"step": 2300
},
{
"epoch": 0.8310249307479224,
"grad_norm": 1.4638538360595703,
"learning_rate": 1.6886618852849723e-06,
"loss": 1.7909,
"step": 2400
},
{
"epoch": 0.8656509695290858,
"grad_norm": 0.9252508282661438,
"learning_rate": 1.0789896075783734e-06,
"loss": 1.818,
"step": 2500
},
{
"epoch": 0.9002770083102493,
"grad_norm": 0.8653609752655029,
"learning_rate": 5.994057497592032e-07,
"loss": 1.7445,
"step": 2600
},
{
"epoch": 0.9349030470914127,
"grad_norm": 1.0171947479248047,
"learning_rate": 2.569037244032657e-07,
"loss": 1.7914,
"step": 2700
},
{
"epoch": 0.9695290858725761,
"grad_norm": 1.0968868732452393,
"learning_rate": 5.647798228764156e-08,
"loss": 1.8258,
"step": 2800
},
{
"epoch": 1.0,
"eval_loss": 1.476217269897461,
"eval_runtime": 13.7874,
"eval_samples_per_second": 13.708,
"eval_steps_per_second": 1.741,
"step": 2888
},
{
"epoch": 1.0,
"step": 2888,
"total_flos": 5.2595085312e+16,
"train_loss": 1.9235466986151613,
"train_runtime": 918.9364,
"train_samples_per_second": 6.284,
"train_steps_per_second": 3.143
}
],
"logging_steps": 100,
"max_steps": 2888,
"num_input_tokens_seen": 0,
"num_train_epochs": 1,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 5.2595085312e+16,
"train_batch_size": 2,
"trial_name": null,
"trial_params": null
}