| { |
| "best_metric": 38.075, |
| "best_model_checkpoint": "/scratch2/nlp/liuyang/Meta-Llama-3-8B-wordnet/checkpoint-70", |
| "epoch": 5.0, |
| "eval_steps": 7, |
| "global_step": 70, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0, |
| "eval_generation_length": 42.36, |
| "eval_loss": 13.450104713439941, |
| "eval_rouge-l": 7.85, |
| "eval_runtime": 95.306, |
| "eval_samples_per_second": 18.624, |
| "eval_sentence_bleu_cpp": 3.995, |
| "eval_steps_per_second": 0.073, |
| "num_input_tokens_seen": 0, |
| "step": 0 |
| }, |
| { |
| "epoch": 0.07142857142857142, |
| "grad_norm": 80.78730010986328, |
| "learning_rate": 0.0, |
| "loss": 5.2505, |
| "num_input_tokens_seen": 43008, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.5, |
| "eval_generation_length": 5.89, |
| "eval_loss": 15.164384841918945, |
| "eval_rouge-l": 30.204, |
| "eval_runtime": 65.9408, |
| "eval_samples_per_second": 26.918, |
| "eval_sentence_bleu_cpp": 31.676, |
| "eval_steps_per_second": 0.106, |
| "num_input_tokens_seen": 233472, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.7142857142857143, |
| "grad_norm": 323848.53125, |
| "learning_rate": 4.36251434774578e-06, |
| "loss": 4.7148, |
| "num_input_tokens_seen": 324608, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_generation_length": 6.04, |
| "eval_loss": 14.542945861816406, |
| "eval_rouge-l": 30.355, |
| "eval_runtime": 68.4494, |
| "eval_samples_per_second": 25.932, |
| "eval_sentence_bleu_cpp": 31.014, |
| "eval_steps_per_second": 0.102, |
| "num_input_tokens_seen": 464896, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.4285714285714286, |
| "grad_norm": 17938.568359375, |
| "learning_rate": 5e-06, |
| "loss": 5.7104, |
| "num_input_tokens_seen": 665600, |
| "step": 20 |
| }, |
| { |
| "epoch": 1.5, |
| "eval_generation_length": 5.02, |
| "eval_loss": 14.128557205200195, |
| "eval_rouge-l": 27.518, |
| "eval_runtime": 66.0406, |
| "eval_samples_per_second": 26.877, |
| "eval_sentence_bleu_cpp": 32.125, |
| "eval_steps_per_second": 0.106, |
| "num_input_tokens_seen": 699392, |
| "step": 21 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_generation_length": 4.15, |
| "eval_loss": 13.396920204162598, |
| "eval_rouge-l": 25.92, |
| "eval_runtime": 64.4494, |
| "eval_samples_per_second": 27.541, |
| "eval_sentence_bleu_cpp": 33.766, |
| "eval_steps_per_second": 0.109, |
| "num_input_tokens_seen": 927744, |
| "step": 28 |
| }, |
| { |
| "epoch": 2.142857142857143, |
| "grad_norm": 14712.626953125, |
| "learning_rate": 5e-06, |
| "loss": 4.8626, |
| "num_input_tokens_seen": 1003520, |
| "step": 30 |
| }, |
| { |
| "epoch": 2.5, |
| "eval_generation_length": 4.53, |
| "eval_loss": 12.070054054260254, |
| "eval_rouge-l": 26.618, |
| "eval_runtime": 70.086, |
| "eval_samples_per_second": 25.326, |
| "eval_sentence_bleu_cpp": 32.807, |
| "eval_steps_per_second": 0.1, |
| "num_input_tokens_seen": 1162240, |
| "step": 35 |
| }, |
| { |
| "epoch": 2.857142857142857, |
| "grad_norm": 5062.14111328125, |
| "learning_rate": 5e-06, |
| "loss": 3.8264, |
| "num_input_tokens_seen": 1327104, |
| "step": 40 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_generation_length": 5.41, |
| "eval_loss": 10.014744758605957, |
| "eval_rouge-l": 30.413, |
| "eval_runtime": 95.5382, |
| "eval_samples_per_second": 18.579, |
| "eval_sentence_bleu_cpp": 32.096, |
| "eval_steps_per_second": 0.073, |
| "num_input_tokens_seen": 1392640, |
| "step": 42 |
| }, |
| { |
| "epoch": 3.5, |
| "eval_generation_length": 5.92, |
| "eval_loss": 9.216021537780762, |
| "eval_rouge-l": 33.701, |
| "eval_runtime": 67.3876, |
| "eval_samples_per_second": 26.34, |
| "eval_sentence_bleu_cpp": 33.189, |
| "eval_steps_per_second": 0.104, |
| "num_input_tokens_seen": 1625088, |
| "step": 49 |
| }, |
| { |
| "epoch": 3.571428571428571, |
| "grad_norm": 691.280517578125, |
| "learning_rate": 5e-06, |
| "loss": 2.7447, |
| "num_input_tokens_seen": 1660928, |
| "step": 50 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_generation_length": 6.03, |
| "eval_loss": 8.689969062805176, |
| "eval_rouge-l": 32.932, |
| "eval_runtime": 72.0579, |
| "eval_samples_per_second": 24.633, |
| "eval_sentence_bleu_cpp": 33.096, |
| "eval_steps_per_second": 0.097, |
| "num_input_tokens_seen": 1855488, |
| "step": 56 |
| }, |
| { |
| "epoch": 4.285714285714286, |
| "grad_norm": 106.33992004394531, |
| "learning_rate": 5e-06, |
| "loss": 2.3026, |
| "num_input_tokens_seen": 1995776, |
| "step": 60 |
| }, |
| { |
| "epoch": 4.5, |
| "eval_generation_length": 5.78, |
| "eval_loss": 8.175033569335938, |
| "eval_rouge-l": 34.975, |
| "eval_runtime": 69.4895, |
| "eval_samples_per_second": 25.543, |
| "eval_sentence_bleu_cpp": 35.756, |
| "eval_steps_per_second": 0.101, |
| "num_input_tokens_seen": 2088960, |
| "step": 63 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 104.36761474609375, |
| "learning_rate": 5e-06, |
| "loss": 1.8254, |
| "num_input_tokens_seen": 2320384, |
| "step": 70 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_generation_length": 5.01, |
| "eval_loss": 7.672922611236572, |
| "eval_rouge-l": 35.522, |
| "eval_runtime": 71.8708, |
| "eval_samples_per_second": 24.697, |
| "eval_sentence_bleu_cpp": 38.075, |
| "eval_steps_per_second": 0.097, |
| "num_input_tokens_seen": 2320384, |
| "step": 70 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 140, |
| "num_input_tokens_seen": 2320384, |
| "num_train_epochs": 10, |
| "save_steps": 7, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.0448584360276787e+17, |
| "train_batch_size": 128, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|