| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.23573785950023574, |
| "eval_steps": 100, |
| "global_step": 1000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.023573785950023574, |
| "eval_accuracy": 0.6317716870477788, |
| "eval_loss": 1.8646224737167358, |
| "eval_runtime": 492.4929, |
| "eval_samples_per_second": 15.507, |
| "eval_steps_per_second": 0.485, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.04714757190004715, |
| "eval_accuracy": 0.7021049967171662, |
| "eval_loss": 1.4454373121261597, |
| "eval_runtime": 492.5577, |
| "eval_samples_per_second": 15.505, |
| "eval_steps_per_second": 0.485, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.07072135785007072, |
| "eval_accuracy": 0.7335578085807225, |
| "eval_loss": 1.2683207988739014, |
| "eval_runtime": 492.7056, |
| "eval_samples_per_second": 15.5, |
| "eval_steps_per_second": 0.485, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.0942951438000943, |
| "eval_accuracy": 0.7496352846832398, |
| "eval_loss": 1.1724069118499756, |
| "eval_runtime": 492.418, |
| "eval_samples_per_second": 15.509, |
| "eval_steps_per_second": 0.485, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.11786892975011787, |
| "grad_norm": 3.823148250579834, |
| "learning_rate": 4.803551783749804e-05, |
| "loss": 1.7013, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.11786892975011787, |
| "eval_accuracy": 0.7622726018519113, |
| "eval_loss": 1.1071853637695312, |
| "eval_runtime": 492.5647, |
| "eval_samples_per_second": 15.505, |
| "eval_steps_per_second": 0.485, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.14144271570014144, |
| "eval_accuracy": 0.7731863812800325, |
| "eval_loss": 1.0427114963531494, |
| "eval_runtime": 492.2995, |
| "eval_samples_per_second": 15.513, |
| "eval_steps_per_second": 0.485, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.16501650165016502, |
| "eval_accuracy": 0.7809848069202094, |
| "eval_loss": 1.003616452217102, |
| "eval_runtime": 492.5831, |
| "eval_samples_per_second": 15.504, |
| "eval_steps_per_second": 0.485, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.1885902876001886, |
| "eval_accuracy": 0.7868278015496704, |
| "eval_loss": 0.9764024615287781, |
| "eval_runtime": 492.4908, |
| "eval_samples_per_second": 15.507, |
| "eval_steps_per_second": 0.485, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.21216407355021216, |
| "eval_accuracy": 0.7921159020990164, |
| "eval_loss": 0.94410640001297, |
| "eval_runtime": 492.6567, |
| "eval_samples_per_second": 15.502, |
| "eval_steps_per_second": 0.485, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.23573785950023574, |
| "grad_norm": 2.0617034435272217, |
| "learning_rate": 4.607103567499607e-05, |
| "loss": 0.9942, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.23573785950023574, |
| "eval_accuracy": 0.7963057274181528, |
| "eval_loss": 0.9231188297271729, |
| "eval_runtime": 492.777, |
| "eval_samples_per_second": 15.498, |
| "eval_steps_per_second": 0.485, |
| "step": 1000 |
| } |
| ], |
| "logging_steps": 500, |
| "max_steps": 12726, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 200, |
| "total_flos": 7.41887283560448e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|