| { | |
| "best_metric": 1.495205044746399, | |
| "best_model_checkpoint": "safety_instruct_detect/checkpoint-2556", | |
| "epoch": 10.0, | |
| "global_step": 12780, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 2.2455e-05, | |
| "loss": 1.5162, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "learning_rate": 4.4955000000000006e-05, | |
| "loss": 1.4983, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.4036969420356002, | |
| "eval_loss": 1.4984349012374878, | |
| "eval_runtime": 11.2442, | |
| "eval_samples_per_second": 389.712, | |
| "eval_steps_per_second": 48.736, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 4.309380305602717e-05, | |
| "loss": 1.4868, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 4.1183786078098476e-05, | |
| "loss": 1.4851, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 3.927376910016978e-05, | |
| "loss": 1.4886, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.4036969420356002, | |
| "eval_loss": 1.495205044746399, | |
| "eval_runtime": 11.4186, | |
| "eval_samples_per_second": 383.76, | |
| "eval_steps_per_second": 47.992, | |
| "step": 2556 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "learning_rate": 3.736757215619695e-05, | |
| "loss": 1.491, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 3.545755517826825e-05, | |
| "loss": 1.4828, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.4036969420356002, | |
| "eval_loss": 1.4983419179916382, | |
| "eval_runtime": 11.4982, | |
| "eval_samples_per_second": 381.103, | |
| "eval_steps_per_second": 47.66, | |
| "step": 3834 | |
| }, | |
| { | |
| "epoch": 3.13, | |
| "learning_rate": 3.354753820033956e-05, | |
| "loss": 1.4937, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.52, | |
| "learning_rate": 3.163752122241087e-05, | |
| "loss": 1.4751, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 2.9731324278438033e-05, | |
| "loss": 1.4853, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.4036969420356002, | |
| "eval_loss": 1.5341633558273315, | |
| "eval_runtime": 11.4113, | |
| "eval_samples_per_second": 384.006, | |
| "eval_steps_per_second": 48.023, | |
| "step": 5112 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 2.782130730050934e-05, | |
| "loss": 1.4528, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 2.5911290322580646e-05, | |
| "loss": 1.4446, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.39639434048379735, | |
| "eval_loss": 1.557020664215088, | |
| "eval_runtime": 11.9866, | |
| "eval_samples_per_second": 365.575, | |
| "eval_steps_per_second": 45.718, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 5.09, | |
| "learning_rate": 2.4001273344651956e-05, | |
| "loss": 1.4092, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.48, | |
| "learning_rate": 2.209125636672326e-05, | |
| "loss": 1.3309, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 2.018123938879457e-05, | |
| "loss": 1.3054, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.36376083979917845, | |
| "eval_loss": 1.8100188970565796, | |
| "eval_runtime": 11.9522, | |
| "eval_samples_per_second": 366.627, | |
| "eval_steps_per_second": 45.849, | |
| "step": 7668 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 1.8271222410865876e-05, | |
| "loss": 1.2194, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 6.65, | |
| "learning_rate": 1.6361205432937183e-05, | |
| "loss": 1.1677, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.3265632131446828, | |
| "eval_loss": 2.0681562423706055, | |
| "eval_runtime": 12.1156, | |
| "eval_samples_per_second": 361.683, | |
| "eval_steps_per_second": 45.231, | |
| "step": 8946 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 1.4455008488964348e-05, | |
| "loss": 1.1879, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 7.43, | |
| "learning_rate": 1.2548811544991511e-05, | |
| "loss": 1.0508, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 7.82, | |
| "learning_rate": 1.063879456706282e-05, | |
| "loss": 1.0861, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.3014605203103606, | |
| "eval_loss": 2.157534599304199, | |
| "eval_runtime": 11.8194, | |
| "eval_samples_per_second": 370.747, | |
| "eval_steps_per_second": 46.365, | |
| "step": 10224 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 8.728777589134126e-06, | |
| "loss": 1.0225, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 6.818760611205433e-06, | |
| "loss": 0.9752, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 4.912563667232598e-06, | |
| "loss": 0.9729, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.30785029666818803, | |
| "eval_loss": 2.388486385345459, | |
| "eval_runtime": 11.8239, | |
| "eval_samples_per_second": 370.605, | |
| "eval_steps_per_second": 46.347, | |
| "step": 11502 | |
| }, | |
| { | |
| "epoch": 9.39, | |
| "learning_rate": 3.002546689303905e-06, | |
| "loss": 0.908, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 1.0925297113752123e-06, | |
| "loss": 0.8921, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.3023733455043359, | |
| "eval_loss": 2.518986701965332, | |
| "eval_runtime": 11.4293, | |
| "eval_samples_per_second": 383.399, | |
| "eval_steps_per_second": 47.947, | |
| "step": 12780 | |
| } | |
| ], | |
| "max_steps": 12780, | |
| "num_train_epochs": 10, | |
| "total_flos": 418971279265446.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |