LM-Lexicon-8B-Dense-Wordnet / trainer_state.json
LM-Lexicon's picture
Update with new checkpoints
1f1de2e verified
{
"best_metric": 38.075,
"best_model_checkpoint": "/scratch2/nlp/liuyang/Meta-Llama-3-8B-wordnet/checkpoint-70",
"epoch": 5.0,
"eval_steps": 7,
"global_step": 70,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0,
"eval_generation_length": 42.36,
"eval_loss": 13.450104713439941,
"eval_rouge-l": 7.85,
"eval_runtime": 95.306,
"eval_samples_per_second": 18.624,
"eval_sentence_bleu_cpp": 3.995,
"eval_steps_per_second": 0.073,
"num_input_tokens_seen": 0,
"step": 0
},
{
"epoch": 0.07142857142857142,
"grad_norm": 80.78730010986328,
"learning_rate": 0.0,
"loss": 5.2505,
"num_input_tokens_seen": 43008,
"step": 1
},
{
"epoch": 0.5,
"eval_generation_length": 5.89,
"eval_loss": 15.164384841918945,
"eval_rouge-l": 30.204,
"eval_runtime": 65.9408,
"eval_samples_per_second": 26.918,
"eval_sentence_bleu_cpp": 31.676,
"eval_steps_per_second": 0.106,
"num_input_tokens_seen": 233472,
"step": 7
},
{
"epoch": 0.7142857142857143,
"grad_norm": 323848.53125,
"learning_rate": 4.36251434774578e-06,
"loss": 4.7148,
"num_input_tokens_seen": 324608,
"step": 10
},
{
"epoch": 1.0,
"eval_generation_length": 6.04,
"eval_loss": 14.542945861816406,
"eval_rouge-l": 30.355,
"eval_runtime": 68.4494,
"eval_samples_per_second": 25.932,
"eval_sentence_bleu_cpp": 31.014,
"eval_steps_per_second": 0.102,
"num_input_tokens_seen": 464896,
"step": 14
},
{
"epoch": 1.4285714285714286,
"grad_norm": 17938.568359375,
"learning_rate": 5e-06,
"loss": 5.7104,
"num_input_tokens_seen": 665600,
"step": 20
},
{
"epoch": 1.5,
"eval_generation_length": 5.02,
"eval_loss": 14.128557205200195,
"eval_rouge-l": 27.518,
"eval_runtime": 66.0406,
"eval_samples_per_second": 26.877,
"eval_sentence_bleu_cpp": 32.125,
"eval_steps_per_second": 0.106,
"num_input_tokens_seen": 699392,
"step": 21
},
{
"epoch": 2.0,
"eval_generation_length": 4.15,
"eval_loss": 13.396920204162598,
"eval_rouge-l": 25.92,
"eval_runtime": 64.4494,
"eval_samples_per_second": 27.541,
"eval_sentence_bleu_cpp": 33.766,
"eval_steps_per_second": 0.109,
"num_input_tokens_seen": 927744,
"step": 28
},
{
"epoch": 2.142857142857143,
"grad_norm": 14712.626953125,
"learning_rate": 5e-06,
"loss": 4.8626,
"num_input_tokens_seen": 1003520,
"step": 30
},
{
"epoch": 2.5,
"eval_generation_length": 4.53,
"eval_loss": 12.070054054260254,
"eval_rouge-l": 26.618,
"eval_runtime": 70.086,
"eval_samples_per_second": 25.326,
"eval_sentence_bleu_cpp": 32.807,
"eval_steps_per_second": 0.1,
"num_input_tokens_seen": 1162240,
"step": 35
},
{
"epoch": 2.857142857142857,
"grad_norm": 5062.14111328125,
"learning_rate": 5e-06,
"loss": 3.8264,
"num_input_tokens_seen": 1327104,
"step": 40
},
{
"epoch": 3.0,
"eval_generation_length": 5.41,
"eval_loss": 10.014744758605957,
"eval_rouge-l": 30.413,
"eval_runtime": 95.5382,
"eval_samples_per_second": 18.579,
"eval_sentence_bleu_cpp": 32.096,
"eval_steps_per_second": 0.073,
"num_input_tokens_seen": 1392640,
"step": 42
},
{
"epoch": 3.5,
"eval_generation_length": 5.92,
"eval_loss": 9.216021537780762,
"eval_rouge-l": 33.701,
"eval_runtime": 67.3876,
"eval_samples_per_second": 26.34,
"eval_sentence_bleu_cpp": 33.189,
"eval_steps_per_second": 0.104,
"num_input_tokens_seen": 1625088,
"step": 49
},
{
"epoch": 3.571428571428571,
"grad_norm": 691.280517578125,
"learning_rate": 5e-06,
"loss": 2.7447,
"num_input_tokens_seen": 1660928,
"step": 50
},
{
"epoch": 4.0,
"eval_generation_length": 6.03,
"eval_loss": 8.689969062805176,
"eval_rouge-l": 32.932,
"eval_runtime": 72.0579,
"eval_samples_per_second": 24.633,
"eval_sentence_bleu_cpp": 33.096,
"eval_steps_per_second": 0.097,
"num_input_tokens_seen": 1855488,
"step": 56
},
{
"epoch": 4.285714285714286,
"grad_norm": 106.33992004394531,
"learning_rate": 5e-06,
"loss": 2.3026,
"num_input_tokens_seen": 1995776,
"step": 60
},
{
"epoch": 4.5,
"eval_generation_length": 5.78,
"eval_loss": 8.175033569335938,
"eval_rouge-l": 34.975,
"eval_runtime": 69.4895,
"eval_samples_per_second": 25.543,
"eval_sentence_bleu_cpp": 35.756,
"eval_steps_per_second": 0.101,
"num_input_tokens_seen": 2088960,
"step": 63
},
{
"epoch": 5.0,
"grad_norm": 104.36761474609375,
"learning_rate": 5e-06,
"loss": 1.8254,
"num_input_tokens_seen": 2320384,
"step": 70
},
{
"epoch": 5.0,
"eval_generation_length": 5.01,
"eval_loss": 7.672922611236572,
"eval_rouge-l": 35.522,
"eval_runtime": 71.8708,
"eval_samples_per_second": 24.697,
"eval_sentence_bleu_cpp": 38.075,
"eval_steps_per_second": 0.097,
"num_input_tokens_seen": 2320384,
"step": 70
}
],
"logging_steps": 10,
"max_steps": 140,
"num_input_tokens_seen": 2320384,
"num_train_epochs": 10,
"save_steps": 7,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 1.0448584360276787e+17,
"train_batch_size": 128,
"trial_name": null,
"trial_params": null
}