TmpModel / saves /chess /no_explain /checkpoint-4000 /trainer_state.json
reasonwang's picture
Upload folder using huggingface_hub
a1506a1 verified
{
"best_metric": 0.029243575409054756,
"best_model_checkpoint": "saves/chess/no_explain/checkpoint-4000",
"epoch": 3.202643171806167,
"eval_steps": 1000,
"global_step": 4000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.08009611533840609,
"grad_norm": 0.8625897724596373,
"learning_rate": 4.006410256410257e-07,
"loss": 1.3897,
"step": 100
},
{
"epoch": 0.16019223067681218,
"grad_norm": 0.8895947937892531,
"learning_rate": 8.012820512820515e-07,
"loss": 0.0598,
"step": 200
},
{
"epoch": 0.24028834601521826,
"grad_norm": 0.5221246844134636,
"learning_rate": 1.201923076923077e-06,
"loss": 0.0551,
"step": 300
},
{
"epoch": 0.32038446135362436,
"grad_norm": 0.5590357289952654,
"learning_rate": 1.602564102564103e-06,
"loss": 0.0516,
"step": 400
},
{
"epoch": 0.4004805766920304,
"grad_norm": 0.36991974174438536,
"learning_rate": 2.0032051282051286e-06,
"loss": 0.0501,
"step": 500
},
{
"epoch": 0.4805766920304365,
"grad_norm": 0.6389443947236714,
"learning_rate": 2.403846153846154e-06,
"loss": 0.0486,
"step": 600
},
{
"epoch": 0.5606728073688426,
"grad_norm": 0.44563280571067243,
"learning_rate": 2.8044871794871797e-06,
"loss": 0.0463,
"step": 700
},
{
"epoch": 0.6407689227072487,
"grad_norm": 0.44266380357676305,
"learning_rate": 3.205128205128206e-06,
"loss": 0.0447,
"step": 800
},
{
"epoch": 0.7208650380456548,
"grad_norm": 0.585654631503778,
"learning_rate": 3.605769230769231e-06,
"loss": 0.0441,
"step": 900
},
{
"epoch": 0.8009611533840608,
"grad_norm": 0.600751877456253,
"learning_rate": 4.006410256410257e-06,
"loss": 0.0429,
"step": 1000
},
{
"epoch": 0.8009611533840608,
"eval_loss": 0.042210426181554794,
"eval_runtime": 97.133,
"eval_samples_per_second": 1462.17,
"eval_steps_per_second": 2.862,
"step": 1000
},
{
"epoch": 0.8810572687224669,
"grad_norm": 0.2641551118831142,
"learning_rate": 4.4070512820512826e-06,
"loss": 0.0414,
"step": 1100
},
{
"epoch": 0.961153384060873,
"grad_norm": 0.29049561928975876,
"learning_rate": 4.807692307692308e-06,
"loss": 0.0402,
"step": 1200
},
{
"epoch": 1.0408490188225872,
"grad_norm": 0.5344113116420023,
"learning_rate": 4.999735579817769e-06,
"loss": 0.0386,
"step": 1300
},
{
"epoch": 1.1209451341609933,
"grad_norm": 0.31257482202449377,
"learning_rate": 4.997740994288484e-06,
"loss": 0.0373,
"step": 1400
},
{
"epoch": 1.2010412494993994,
"grad_norm": 0.4593106982622164,
"learning_rate": 4.993792498360407e-06,
"loss": 0.0366,
"step": 1500
},
{
"epoch": 1.2811373648378055,
"grad_norm": 0.2012883704449717,
"learning_rate": 4.9878931808274796e-06,
"loss": 0.0357,
"step": 1600
},
{
"epoch": 1.3612334801762114,
"grad_norm": 0.22908626001592647,
"learning_rate": 4.980047656554856e-06,
"loss": 0.0352,
"step": 1700
},
{
"epoch": 1.4413295955146175,
"grad_norm": 0.3169879320183415,
"learning_rate": 4.970262062868821e-06,
"loss": 0.0346,
"step": 1800
},
{
"epoch": 1.5214257108530236,
"grad_norm": 0.2078878255601618,
"learning_rate": 4.958544054755741e-06,
"loss": 0.0336,
"step": 1900
},
{
"epoch": 1.6015218261914297,
"grad_norm": 0.2978110993331312,
"learning_rate": 4.944902798873794e-06,
"loss": 0.0329,
"step": 2000
},
{
"epoch": 1.6015218261914297,
"eval_loss": 0.03361953794956207,
"eval_runtime": 97.2876,
"eval_samples_per_second": 1459.847,
"eval_steps_per_second": 2.858,
"step": 2000
},
{
"epoch": 1.6816179415298358,
"grad_norm": 0.16678424956102253,
"learning_rate": 4.92934896638215e-06,
"loss": 0.0328,
"step": 2100
},
{
"epoch": 1.761714056868242,
"grad_norm": 0.19029664571581045,
"learning_rate": 4.91189472459324e-06,
"loss": 0.0316,
"step": 2200
},
{
"epoch": 1.841810172206648,
"grad_norm": 0.2388908631462674,
"learning_rate": 4.892553727454616e-06,
"loss": 0.0317,
"step": 2300
},
{
"epoch": 1.921906287545054,
"grad_norm": 0.15794270702360638,
"learning_rate": 4.8713411048678635e-06,
"loss": 0.0309,
"step": 2400
},
{
"epoch": 2.0016019223067683,
"grad_norm": 0.2103115075663395,
"learning_rate": 4.848273450852921e-06,
"loss": 0.0305,
"step": 2500
},
{
"epoch": 2.0816980376451744,
"grad_norm": 0.28601246983481904,
"learning_rate": 4.823368810567056e-06,
"loss": 0.0268,
"step": 2600
},
{
"epoch": 2.1617941529835805,
"grad_norm": 0.25522616878445004,
"learning_rate": 4.796646666188663e-06,
"loss": 0.0268,
"step": 2700
},
{
"epoch": 2.2418902683219866,
"grad_norm": 0.2343538332348778,
"learning_rate": 4.768127921676916e-06,
"loss": 0.0272,
"step": 2800
},
{
"epoch": 2.3219863836603922,
"grad_norm": 0.22903658893889398,
"learning_rate": 4.737834886419217e-06,
"loss": 0.0297,
"step": 2900
},
{
"epoch": 2.4020824989987988,
"grad_norm": 0.19855668130980528,
"learning_rate": 4.705791257779196e-06,
"loss": 0.0275,
"step": 3000
},
{
"epoch": 2.4020824989987988,
"eval_loss": 0.029653793200850487,
"eval_runtime": 97.2179,
"eval_samples_per_second": 1460.893,
"eval_steps_per_second": 2.86,
"step": 3000
},
{
"epoch": 2.4821786143372044,
"grad_norm": 0.1868527106405498,
"learning_rate": 4.672022102558958e-06,
"loss": 0.0269,
"step": 3100
},
{
"epoch": 2.562274729675611,
"grad_norm": 0.1985255713449175,
"learning_rate": 4.636553837390051e-06,
"loss": 0.0269,
"step": 3200
},
{
"epoch": 2.6423708450140166,
"grad_norm": 0.17528235376425527,
"learning_rate": 4.5994142080684956e-06,
"loss": 0.026,
"step": 3300
},
{
"epoch": 2.7224669603524227,
"grad_norm": 0.20238382028782428,
"learning_rate": 4.560632267850054e-06,
"loss": 0.026,
"step": 3400
},
{
"epoch": 2.802563075690829,
"grad_norm": 0.20789525240306345,
"learning_rate": 4.5202383547227134e-06,
"loss": 0.0257,
"step": 3500
},
{
"epoch": 2.882659191029235,
"grad_norm": 0.2849074845845128,
"learning_rate": 4.478264067674155e-06,
"loss": 0.0256,
"step": 3600
},
{
"epoch": 2.962755306367641,
"grad_norm": 0.1826392119567578,
"learning_rate": 4.43474224197278e-06,
"loss": 0.0255,
"step": 3700
},
{
"epoch": 3.0424509411293554,
"grad_norm": 0.3254043272458406,
"learning_rate": 4.389706923481633e-06,
"loss": 0.0224,
"step": 3800
},
{
"epoch": 3.122547056467761,
"grad_norm": 0.2695456046362865,
"learning_rate": 4.34319334202531e-06,
"loss": 0.0198,
"step": 3900
},
{
"epoch": 3.202643171806167,
"grad_norm": 0.24345073976828904,
"learning_rate": 4.2952378838306855e-06,
"loss": 0.0202,
"step": 4000
},
{
"epoch": 3.202643171806167,
"eval_loss": 0.029243575409054756,
"eval_runtime": 97.6159,
"eval_samples_per_second": 1454.937,
"eval_steps_per_second": 2.848,
"step": 4000
}
],
"logging_steps": 100,
"max_steps": 12480,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 1000,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 892260770119680.0,
"train_batch_size": 64,
"trial_name": null,
"trial_params": null
}