| { |
| "best_global_step": 150, |
| "best_metric": 0.18762432038784027, |
| "best_model_checkpoint": "models/sft+lora/meta-llama_Meta-Llama-3.1-8B-Instruct/checkpoint-150", |
| "epoch": 2.8846153846153846, |
| "eval_steps": 50, |
| "global_step": 150, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.4807692307692308, |
| "grad_norm": 1.6045993566513062, |
| "learning_rate": 0.00016923076923076923, |
| "loss": 0.9727, |
| "mean_token_accuracy": 0.7842577388882637, |
| "num_tokens": 28261.0, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.9615384615384616, |
| "grad_norm": 1.9574581384658813, |
| "learning_rate": 0.00013717948717948718, |
| "loss": 0.3981, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.9615384615384616, |
| "eval_loss": 0.25703853368759155, |
| "eval_mean_token_accuracy": 0.9377317343439374, |
| "eval_num_tokens": 56396.0, |
| "eval_runtime": 2.1687, |
| "eval_samples_per_second": 24.438, |
| "eval_steps_per_second": 3.228, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.4423076923076923, |
| "grad_norm": 1.1697373390197754, |
| "learning_rate": 0.00010512820512820514, |
| "loss": 0.1858, |
| "mean_token_accuracy": 0.9360612297058105, |
| "num_tokens": 84639.0, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "grad_norm": 1.019529104232788, |
| "learning_rate": 7.307692307692307e-05, |
| "loss": 0.1702, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.9230769230769231, |
| "eval_loss": 0.211117222905159, |
| "eval_mean_token_accuracy": 0.9496188674654279, |
| "eval_num_tokens": 113022.0, |
| "eval_runtime": 2.1669, |
| "eval_samples_per_second": 24.459, |
| "eval_steps_per_second": 3.23, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.4038461538461537, |
| "grad_norm": 0.7372673153877258, |
| "learning_rate": 4.1025641025641023e-05, |
| "loss": 0.0979, |
| "mean_token_accuracy": 0.9645662650465965, |
| "num_tokens": 140836.0, |
| "step": 125 |
| }, |
| { |
| "epoch": 2.8846153846153846, |
| "grad_norm": 0.6960735321044922, |
| "learning_rate": 8.974358974358976e-06, |
| "loss": 0.091, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.8846153846153846, |
| "eval_loss": 0.18762432038784027, |
| "eval_mean_token_accuracy": 0.9574507900646755, |
| "eval_num_tokens": 169445.0, |
| "eval_runtime": 2.1705, |
| "eval_samples_per_second": 24.418, |
| "eval_steps_per_second": 3.225, |
| "step": 150 |
| } |
| ], |
| "logging_steps": 25, |
| "max_steps": 156, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 50, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 8185446818242560.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|