| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 10767, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.14, |
| "learning_rate": 0.0001, |
| "loss": 1.703, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.28, |
| "learning_rate": 0.0001, |
| "loss": 1.5833, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.42, |
| "learning_rate": 0.0001, |
| "loss": 1.5444, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.56, |
| "learning_rate": 0.0001, |
| "loss": 1.5199, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.7, |
| "learning_rate": 0.0001, |
| "loss": 1.4972, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.84, |
| "learning_rate": 0.0001, |
| "loss": 1.4789, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.98, |
| "learning_rate": 0.0001, |
| "loss": 1.448, |
| "step": 3500 |
| }, |
| { |
| "epoch": 1.11, |
| "learning_rate": 0.0001, |
| "loss": 1.3726, |
| "step": 4000 |
| }, |
| { |
| "epoch": 1.25, |
| "learning_rate": 0.0001, |
| "loss": 1.3587, |
| "step": 4500 |
| }, |
| { |
| "epoch": 1.39, |
| "learning_rate": 0.0001, |
| "loss": 1.3301, |
| "step": 5000 |
| }, |
| { |
| "epoch": 1.53, |
| "learning_rate": 0.0001, |
| "loss": 1.3619, |
| "step": 5500 |
| }, |
| { |
| "epoch": 1.67, |
| "learning_rate": 0.0001, |
| "loss": 1.3321, |
| "step": 6000 |
| }, |
| { |
| "epoch": 1.81, |
| "learning_rate": 0.0001, |
| "loss": 1.3311, |
| "step": 6500 |
| }, |
| { |
| "epoch": 1.95, |
| "learning_rate": 0.0001, |
| "loss": 1.328, |
| "step": 7000 |
| }, |
| { |
| "epoch": 2.09, |
| "learning_rate": 0.0001, |
| "loss": 1.2657, |
| "step": 7500 |
| }, |
| { |
| "epoch": 2.23, |
| "learning_rate": 0.0001, |
| "loss": 1.2526, |
| "step": 8000 |
| }, |
| { |
| "epoch": 2.37, |
| "learning_rate": 0.0001, |
| "loss": 1.2357, |
| "step": 8500 |
| }, |
| { |
| "epoch": 2.51, |
| "learning_rate": 0.0001, |
| "loss": 1.234, |
| "step": 9000 |
| }, |
| { |
| "epoch": 2.65, |
| "learning_rate": 0.0001, |
| "loss": 1.2335, |
| "step": 9500 |
| }, |
| { |
| "epoch": 2.79, |
| "learning_rate": 0.0001, |
| "loss": 1.2372, |
| "step": 10000 |
| }, |
| { |
| "epoch": 2.93, |
| "learning_rate": 0.0001, |
| "loss": 1.2362, |
| "step": 10500 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 10767, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "total_flos": 6298513440768.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|