| { |
| "best_metric": 0.000347244058502838, |
| "best_model_checkpoint": "./models/results_the_naughtyformer_453/checkpoint-200", |
| "epoch": 0.18507807981492191, |
| "global_step": 200, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.01, |
| "learning_rate": 4.000000000000001e-06, |
| "loss": 0.6633, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.01, |
| "learning_rate": 8.000000000000001e-06, |
| "loss": 0.5983, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.02, |
| "learning_rate": 1.2e-05, |
| "loss": 0.4739, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.02, |
| "eval_accuracy": 0.8941295546558704, |
| "eval_f1": 0.9441060168857539, |
| "eval_loss": 0.3223942816257477, |
| "eval_precision": 0.8941295546558704, |
| "eval_recall": 1.0, |
| "eval_runtime": 45.8231, |
| "eval_samples_per_second": 215.612, |
| "eval_steps_per_second": 26.951, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.03, |
| "learning_rate": 1.6000000000000003e-05, |
| "loss": 0.252, |
| "step": 32 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 2e-05, |
| "loss": 0.1025, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.04, |
| "learning_rate": 2.4e-05, |
| "loss": 0.0265, |
| "step": 48 |
| }, |
| { |
| "epoch": 0.05, |
| "eval_accuracy": 0.9998987854251012, |
| "eval_f1": 0.9999434037013979, |
| "eval_loss": 0.0012769288150593638, |
| "eval_precision": 0.9998868138087154, |
| "eval_recall": 1.0, |
| "eval_runtime": 46.1333, |
| "eval_samples_per_second": 214.162, |
| "eval_steps_per_second": 26.77, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.05, |
| "learning_rate": 2.8000000000000003e-05, |
| "loss": 0.0017, |
| "step": 56 |
| }, |
| { |
| "epoch": 0.06, |
| "learning_rate": 3.2000000000000005e-05, |
| "loss": 0.0006, |
| "step": 64 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 3.6e-05, |
| "loss": 0.0003, |
| "step": 72 |
| }, |
| { |
| "epoch": 0.07, |
| "eval_accuracy": 0.9993927125506072, |
| "eval_f1": 0.9996602876231457, |
| "eval_loss": 0.003010607324540615, |
| "eval_precision": 1.0, |
| "eval_recall": 0.9993208059769074, |
| "eval_runtime": 44.7372, |
| "eval_samples_per_second": 220.845, |
| "eval_steps_per_second": 27.606, |
| "step": 75 |
| }, |
| { |
| "epoch": 0.07, |
| "learning_rate": 4e-05, |
| "loss": 0.0003, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.08, |
| "learning_rate": 4.4000000000000006e-05, |
| "loss": 0.0002, |
| "step": 88 |
| }, |
| { |
| "epoch": 0.09, |
| "learning_rate": 4.8e-05, |
| "loss": 0.0001, |
| "step": 96 |
| }, |
| { |
| "epoch": 0.09, |
| "eval_accuracy": 0.9997975708502024, |
| "eval_f1": 0.9998867881806861, |
| "eval_loss": 0.0006089358939789236, |
| "eval_precision": 1.0, |
| "eval_recall": 0.9997736019923025, |
| "eval_runtime": 44.1013, |
| "eval_samples_per_second": 224.03, |
| "eval_steps_per_second": 28.004, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.979591836734694e-05, |
| "loss": 0.0001, |
| "step": 104 |
| }, |
| { |
| "epoch": 0.1, |
| "learning_rate": 4.938775510204082e-05, |
| "loss": 0.0005, |
| "step": 112 |
| }, |
| { |
| "epoch": 0.11, |
| "learning_rate": 4.903061224489796e-05, |
| "loss": 0.017, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.12, |
| "eval_accuracy": 0.9991902834008097, |
| "eval_f1": 0.9995469988674972, |
| "eval_loss": 0.0038844814989715815, |
| "eval_precision": 1.0, |
| "eval_recall": 0.9990944079692099, |
| "eval_runtime": 46.275, |
| "eval_samples_per_second": 213.506, |
| "eval_steps_per_second": 26.688, |
| "step": 125 |
| }, |
| { |
| "epoch": 0.12, |
| "learning_rate": 4.862244897959184e-05, |
| "loss": 0.0001, |
| "step": 128 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.8214285714285716e-05, |
| "loss": 0.0007, |
| "step": 136 |
| }, |
| { |
| "epoch": 0.13, |
| "learning_rate": 4.7806122448979595e-05, |
| "loss": 0.0072, |
| "step": 144 |
| }, |
| { |
| "epoch": 0.14, |
| "eval_accuracy": 0.9974696356275303, |
| "eval_f1": 0.998583007425041, |
| "eval_loss": 0.01291586086153984, |
| "eval_precision": 1.0, |
| "eval_recall": 0.9971700249037808, |
| "eval_runtime": 45.6854, |
| "eval_samples_per_second": 216.262, |
| "eval_steps_per_second": 27.033, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.14, |
| "learning_rate": 4.7397959183673474e-05, |
| "loss": 0.0001, |
| "step": 152 |
| }, |
| { |
| "epoch": 0.15, |
| "learning_rate": 4.7040816326530614e-05, |
| "loss": 0.0184, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.663265306122449e-05, |
| "loss": 0.0464, |
| "step": 168 |
| }, |
| { |
| "epoch": 0.16, |
| "eval_accuracy": 0.9984817813765182, |
| "eval_f1": 0.9991502860703564, |
| "eval_loss": 0.005534218158572912, |
| "eval_precision": 1.0, |
| "eval_recall": 0.9983020149422686, |
| "eval_runtime": 46.9808, |
| "eval_samples_per_second": 210.299, |
| "eval_steps_per_second": 26.287, |
| "step": 175 |
| }, |
| { |
| "epoch": 0.16, |
| "learning_rate": 4.6224489795918366e-05, |
| "loss": 0.0097, |
| "step": 176 |
| }, |
| { |
| "epoch": 0.17, |
| "learning_rate": 4.5816326530612245e-05, |
| "loss": 0.0005, |
| "step": 184 |
| }, |
| { |
| "epoch": 0.18, |
| "learning_rate": 4.5408163265306124e-05, |
| "loss": 0.0002, |
| "step": 192 |
| }, |
| { |
| "epoch": 0.19, |
| "learning_rate": 4.5e-05, |
| "loss": 0.0001, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.19, |
| "eval_accuracy": 0.9997975708502024, |
| "eval_f1": 0.9998868009961512, |
| "eval_loss": 0.000347244058502838, |
| "eval_precision": 0.9998868009961512, |
| "eval_recall": 0.9998868009961512, |
| "eval_runtime": 51.238, |
| "eval_samples_per_second": 192.825, |
| "eval_steps_per_second": 24.103, |
| "step": 200 |
| } |
| ], |
| "max_steps": 1080, |
| "num_train_epochs": 1, |
| "total_flos": 3367821508608000.0, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|