| { |
| "best_metric": 0.9126, |
| "best_model_checkpoint": "../../checkpoint/imdb/roberta-large/checkpoint-14858", |
| "epoch": 19.0, |
| "eval_steps": 500, |
| "global_step": 14858, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 1.0, |
| "eval_accuracy": 0.89904, |
| "eval_loss": 0.2406211644411087, |
| "eval_runtime": 103.5694, |
| "eval_samples_per_second": 241.384, |
| "eval_steps_per_second": 0.946, |
| "step": 782 |
| }, |
| { |
| "epoch": 1.28, |
| "learning_rate": 4.680306905370844e-05, |
| "loss": 0.3075, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_accuracy": 0.8962, |
| "eval_loss": 0.2538779377937317, |
| "eval_runtime": 102.8426, |
| "eval_samples_per_second": 243.09, |
| "eval_steps_per_second": 0.953, |
| "step": 1564 |
| }, |
| { |
| "epoch": 2.56, |
| "learning_rate": 4.360613810741688e-05, |
| "loss": 0.2171, |
| "step": 2000 |
| }, |
| { |
| "epoch": 3.0, |
| "eval_accuracy": 0.90308, |
| "eval_loss": 0.2649693489074707, |
| "eval_runtime": 102.9465, |
| "eval_samples_per_second": 242.844, |
| "eval_steps_per_second": 0.952, |
| "step": 2346 |
| }, |
| { |
| "epoch": 3.84, |
| "learning_rate": 4.040920716112532e-05, |
| "loss": 0.1697, |
| "step": 3000 |
| }, |
| { |
| "epoch": 4.0, |
| "eval_accuracy": 0.89728, |
| "eval_loss": 0.3427022695541382, |
| "eval_runtime": 103.0024, |
| "eval_samples_per_second": 242.713, |
| "eval_steps_per_second": 0.951, |
| "step": 3128 |
| }, |
| { |
| "epoch": 5.0, |
| "eval_accuracy": 0.90308, |
| "eval_loss": 0.3241328001022339, |
| "eval_runtime": 102.9184, |
| "eval_samples_per_second": 242.911, |
| "eval_steps_per_second": 0.952, |
| "step": 3910 |
| }, |
| { |
| "epoch": 5.12, |
| "learning_rate": 3.721227621483376e-05, |
| "loss": 0.1339, |
| "step": 4000 |
| }, |
| { |
| "epoch": 6.0, |
| "eval_accuracy": 0.90492, |
| "eval_loss": 0.41408097743988037, |
| "eval_runtime": 102.8464, |
| "eval_samples_per_second": 243.081, |
| "eval_steps_per_second": 0.953, |
| "step": 4692 |
| }, |
| { |
| "epoch": 6.39, |
| "learning_rate": 3.40153452685422e-05, |
| "loss": 0.1038, |
| "step": 5000 |
| }, |
| { |
| "epoch": 7.0, |
| "eval_accuracy": 0.8946, |
| "eval_loss": 0.45717746019363403, |
| "eval_runtime": 102.3334, |
| "eval_samples_per_second": 244.299, |
| "eval_steps_per_second": 0.958, |
| "step": 5474 |
| }, |
| { |
| "epoch": 7.67, |
| "learning_rate": 3.081841432225064e-05, |
| "loss": 0.0922, |
| "step": 6000 |
| }, |
| { |
| "epoch": 8.0, |
| "eval_accuracy": 0.9054, |
| "eval_loss": 0.4153657555580139, |
| "eval_runtime": 102.2979, |
| "eval_samples_per_second": 244.384, |
| "eval_steps_per_second": 0.958, |
| "step": 6256 |
| }, |
| { |
| "epoch": 8.95, |
| "learning_rate": 2.7621483375959077e-05, |
| "loss": 0.0676, |
| "step": 7000 |
| }, |
| { |
| "epoch": 9.0, |
| "eval_accuracy": 0.89824, |
| "eval_loss": 0.5019603371620178, |
| "eval_runtime": 102.1481, |
| "eval_samples_per_second": 244.743, |
| "eval_steps_per_second": 0.959, |
| "step": 7038 |
| }, |
| { |
| "epoch": 10.0, |
| "eval_accuracy": 0.90708, |
| "eval_loss": 0.507008969783783, |
| "eval_runtime": 102.4086, |
| "eval_samples_per_second": 244.12, |
| "eval_steps_per_second": 0.957, |
| "step": 7820 |
| }, |
| { |
| "epoch": 10.23, |
| "learning_rate": 2.442455242966752e-05, |
| "loss": 0.0568, |
| "step": 8000 |
| }, |
| { |
| "epoch": 11.0, |
| "eval_accuracy": 0.90668, |
| "eval_loss": 0.48255667090415955, |
| "eval_runtime": 102.2755, |
| "eval_samples_per_second": 244.438, |
| "eval_steps_per_second": 0.958, |
| "step": 8602 |
| }, |
| { |
| "epoch": 11.51, |
| "learning_rate": 2.122762148337596e-05, |
| "loss": 0.0443, |
| "step": 9000 |
| }, |
| { |
| "epoch": 12.0, |
| "eval_accuracy": 0.90856, |
| "eval_loss": 0.5103762745857239, |
| "eval_runtime": 102.4293, |
| "eval_samples_per_second": 244.071, |
| "eval_steps_per_second": 0.957, |
| "step": 9384 |
| }, |
| { |
| "epoch": 12.79, |
| "learning_rate": 1.80306905370844e-05, |
| "loss": 0.0313, |
| "step": 10000 |
| }, |
| { |
| "epoch": 13.0, |
| "eval_accuracy": 0.90884, |
| "eval_loss": 0.545563817024231, |
| "eval_runtime": 101.9339, |
| "eval_samples_per_second": 245.257, |
| "eval_steps_per_second": 0.961, |
| "step": 10166 |
| }, |
| { |
| "epoch": 14.0, |
| "eval_accuracy": 0.90776, |
| "eval_loss": 0.47395065426826477, |
| "eval_runtime": 102.2186, |
| "eval_samples_per_second": 244.574, |
| "eval_steps_per_second": 0.959, |
| "step": 10948 |
| }, |
| { |
| "epoch": 14.07, |
| "learning_rate": 1.483375959079284e-05, |
| "loss": 0.0245, |
| "step": 11000 |
| }, |
| { |
| "epoch": 15.0, |
| "eval_accuracy": 0.90712, |
| "eval_loss": 0.4977429211139679, |
| "eval_runtime": 102.2005, |
| "eval_samples_per_second": 244.617, |
| "eval_steps_per_second": 0.959, |
| "step": 11730 |
| }, |
| { |
| "epoch": 15.35, |
| "learning_rate": 1.163682864450128e-05, |
| "loss": 0.0227, |
| "step": 12000 |
| }, |
| { |
| "epoch": 16.0, |
| "eval_accuracy": 0.90984, |
| "eval_loss": 0.5135776400566101, |
| "eval_runtime": 102.5324, |
| "eval_samples_per_second": 243.825, |
| "eval_steps_per_second": 0.956, |
| "step": 12512 |
| }, |
| { |
| "epoch": 16.62, |
| "learning_rate": 8.439897698209718e-06, |
| "loss": 0.0175, |
| "step": 13000 |
| }, |
| { |
| "epoch": 17.0, |
| "eval_accuracy": 0.91076, |
| "eval_loss": 0.5130705833435059, |
| "eval_runtime": 102.0766, |
| "eval_samples_per_second": 244.914, |
| "eval_steps_per_second": 0.96, |
| "step": 13294 |
| }, |
| { |
| "epoch": 17.9, |
| "learning_rate": 5.242966751918159e-06, |
| "loss": 0.0173, |
| "step": 14000 |
| }, |
| { |
| "epoch": 18.0, |
| "eval_accuracy": 0.91092, |
| "eval_loss": 0.5370119214057922, |
| "eval_runtime": 102.316, |
| "eval_samples_per_second": 244.341, |
| "eval_steps_per_second": 0.958, |
| "step": 14076 |
| }, |
| { |
| "epoch": 19.0, |
| "eval_accuracy": 0.9126, |
| "eval_loss": 0.5343945026397705, |
| "eval_runtime": 102.4054, |
| "eval_samples_per_second": 244.128, |
| "eval_steps_per_second": 0.957, |
| "step": 14858 |
| } |
| ], |
| "logging_steps": 1000, |
| "max_steps": 15640, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 20, |
| "save_steps": 500, |
| "total_flos": 1.106668493952e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|