| { | |
| "best_metric": 1.652644395828247, | |
| "best_model_checkpoint": "output/nirvana/checkpoint-144", | |
| "epoch": 4.0, | |
| "global_step": 144, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 0.00013040646433810595, | |
| "loss": 2.8877, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00011137140040750913, | |
| "loss": 2.7784, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 8.386493606940316e-05, | |
| "loss": 2.2959, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 5.3335063930596836e-05, | |
| "loss": 2.5076, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 2.5828599592490882e-05, | |
| "loss": 2.4917, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 6.793535661894054e-06, | |
| "loss": 2.4772, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 0.0, | |
| "loss": 2.6382, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.5190556049346924, | |
| "eval_runtime": 2.4884, | |
| "eval_samples_per_second": 22.504, | |
| "eval_steps_per_second": 2.813, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_loss": 2.3264973163604736, | |
| "eval_runtime": 1.1647, | |
| "eval_samples_per_second": 45.506, | |
| "eval_steps_per_second": 6.01, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 4.137086214086682e-06, | |
| "loss": 2.6774, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 2.009247481060283e-05, | |
| "loss": 2.4577, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 4.513741816785908e-05, | |
| "loss": 2.2823, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 7.457888395248933e-05, | |
| "loss": 2.298, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00010290000000000001, | |
| "loss": 2.4088, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 0.00012479383023822482, | |
| "loss": 2.2944, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 0.00013615781185663748, | |
| "loss": 2.0032, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 2.128573179244995, | |
| "eval_runtime": 1.1938, | |
| "eval_samples_per_second": 44.396, | |
| "eval_steps_per_second": 5.864, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_loss": 1.8826501369476318, | |
| "eval_runtime": 1.8444, | |
| "eval_samples_per_second": 22.772, | |
| "eval_steps_per_second": 3.253, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 2.03, | |
| "learning_rate": 0.0001369528677140173, | |
| "loss": 2.0939, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 0.00012848876816285752, | |
| "loss": 2.1654, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 0.00010939183589447406, | |
| "loss": 2.0452, | |
| "step": 85 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 8.305259792170682e-05, | |
| "loss": 1.7357, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 5.414740207829316e-05, | |
| "loss": 2.0788, | |
| "step": 95 | |
| }, | |
| { | |
| "epoch": 2.7, | |
| "learning_rate": 2.7808164105525978e-05, | |
| "loss": 1.928, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 8.711231837142545e-06, | |
| "loss": 1.9376, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 2.4713228598268823e-07, | |
| "loss": 1.9652, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_loss": 1.7358663082122803, | |
| "eval_runtime": 1.8478, | |
| "eval_samples_per_second": 22.729, | |
| "eval_steps_per_second": 3.247, | |
| "step": 111 | |
| }, | |
| { | |
| "epoch": 3.19, | |
| "learning_rate": 1.2406169761775193e-05, | |
| "loss": 1.9469, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 3.4300000000000014e-05, | |
| "loss": 1.7381, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.47, | |
| "learning_rate": 6.262111604751063e-05, | |
| "loss": 1.8423, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 9.206258183214083e-05, | |
| "loss": 1.6384, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 0.0001171075251893971, | |
| "loss": 2.0094, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 0.00013306291378591332, | |
| "loss": 1.8349, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_loss": 1.652644395828247, | |
| "eval_runtime": 0.6674, | |
| "eval_samples_per_second": 74.92, | |
| "eval_steps_per_second": 10.489, | |
| "step": 144 | |
| } | |
| ], | |
| "max_steps": 144, | |
| "num_train_epochs": 4, | |
| "total_flos": 146715475968000.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |