| { | |
| "best_metric": 1.9327729940414429, | |
| "best_model_checkpoint": "./results/checkpoint-322", | |
| "epoch": 22.0, | |
| "eval_steps": 500, | |
| "global_step": 7084, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.2501650165016502, | |
| "eval_loss": 1.9327729940414429, | |
| "eval_precision": 0.21039270770334143, | |
| "eval_recall": 0.2501650165016502, | |
| "eval_runtime": 105.6438, | |
| "eval_samples_per_second": 86.044, | |
| "eval_steps_per_second": 0.435, | |
| "step": 322 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 9.689440993788821e-05, | |
| "loss": 1.5264, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.342024202420242, | |
| "eval_loss": 2.632329225540161, | |
| "eval_precision": 0.35262421551132295, | |
| "eval_recall": 0.342024202420242, | |
| "eval_runtime": 103.0666, | |
| "eval_samples_per_second": 88.195, | |
| "eval_steps_per_second": 0.446, | |
| "step": 644 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.3278327832783278, | |
| "eval_loss": 2.9007134437561035, | |
| "eval_precision": 0.34602885917564774, | |
| "eval_recall": 0.3278327832783278, | |
| "eval_runtime": 102.8287, | |
| "eval_samples_per_second": 88.399, | |
| "eval_steps_per_second": 0.447, | |
| "step": 966 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 9.37888198757764e-05, | |
| "loss": 0.6172, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.3423542354235424, | |
| "eval_loss": 3.5746607780456543, | |
| "eval_precision": 0.3512951146039761, | |
| "eval_recall": 0.3423542354235424, | |
| "eval_runtime": 101.7857, | |
| "eval_samples_per_second": 89.305, | |
| "eval_steps_per_second": 0.452, | |
| "step": 1288 | |
| }, | |
| { | |
| "epoch": 4.66, | |
| "learning_rate": 9.068322981366461e-05, | |
| "loss": 0.227, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.3298129812981298, | |
| "eval_loss": 4.564810276031494, | |
| "eval_precision": 0.3549699546948268, | |
| "eval_recall": 0.3298129812981298, | |
| "eval_runtime": 101.7692, | |
| "eval_samples_per_second": 89.32, | |
| "eval_steps_per_second": 0.452, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.3282728272827283, | |
| "eval_loss": 4.280569553375244, | |
| "eval_precision": 0.3536771477074885, | |
| "eval_recall": 0.3282728272827283, | |
| "eval_runtime": 103.0369, | |
| "eval_samples_per_second": 88.221, | |
| "eval_steps_per_second": 0.446, | |
| "step": 1932 | |
| }, | |
| { | |
| "epoch": 6.21, | |
| "learning_rate": 8.757763975155279e-05, | |
| "loss": 0.118, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.3460946094609461, | |
| "eval_loss": 4.623283386230469, | |
| "eval_precision": 0.344094580634609, | |
| "eval_recall": 0.3460946094609461, | |
| "eval_runtime": 89.9963, | |
| "eval_samples_per_second": 101.004, | |
| "eval_steps_per_second": 0.511, | |
| "step": 2254 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 8.4472049689441e-05, | |
| "loss": 0.0775, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.3316831683168317, | |
| "eval_loss": 4.871355056762695, | |
| "eval_precision": 0.3530027915318833, | |
| "eval_recall": 0.3316831683168317, | |
| "eval_runtime": 90.9139, | |
| "eval_samples_per_second": 99.985, | |
| "eval_steps_per_second": 0.506, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.34026402640264025, | |
| "eval_loss": 4.957677364349365, | |
| "eval_precision": 0.33301623037601086, | |
| "eval_recall": 0.34026402640264025, | |
| "eval_runtime": 100.8674, | |
| "eval_samples_per_second": 90.118, | |
| "eval_steps_per_second": 0.456, | |
| "step": 2898 | |
| }, | |
| { | |
| "epoch": 9.32, | |
| "learning_rate": 8.136645962732919e-05, | |
| "loss": 0.056, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.33641364136413643, | |
| "eval_loss": 5.228715419769287, | |
| "eval_precision": 0.35135134065339246, | |
| "eval_recall": 0.33641364136413643, | |
| "eval_runtime": 101.1595, | |
| "eval_samples_per_second": 89.858, | |
| "eval_steps_per_second": 0.455, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 10.87, | |
| "learning_rate": 7.82608695652174e-05, | |
| "loss": 0.0418, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.3397139713971397, | |
| "eval_loss": 5.417286396026611, | |
| "eval_precision": 0.36541577660239133, | |
| "eval_recall": 0.3397139713971397, | |
| "eval_runtime": 100.0336, | |
| "eval_samples_per_second": 90.869, | |
| "eval_steps_per_second": 0.46, | |
| "step": 3542 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.3288228822882288, | |
| "eval_loss": 4.508035659790039, | |
| "eval_precision": 0.3497733496975655, | |
| "eval_recall": 0.3288228822882288, | |
| "eval_runtime": 98.9066, | |
| "eval_samples_per_second": 91.905, | |
| "eval_steps_per_second": 0.465, | |
| "step": 3864 | |
| }, | |
| { | |
| "epoch": 12.42, | |
| "learning_rate": 7.515527950310559e-05, | |
| "loss": 0.0329, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.33575357535753575, | |
| "eval_loss": 5.399806022644043, | |
| "eval_precision": 0.343976788298361, | |
| "eval_recall": 0.33575357535753575, | |
| "eval_runtime": 99.618, | |
| "eval_samples_per_second": 91.249, | |
| "eval_steps_per_second": 0.462, | |
| "step": 4186 | |
| }, | |
| { | |
| "epoch": 13.98, | |
| "learning_rate": 7.20496894409938e-05, | |
| "loss": 0.0255, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.3254125412541254, | |
| "eval_loss": 5.722477436065674, | |
| "eval_precision": 0.37649139562875183, | |
| "eval_recall": 0.3254125412541254, | |
| "eval_runtime": 99.1531, | |
| "eval_samples_per_second": 91.676, | |
| "eval_steps_per_second": 0.464, | |
| "step": 4508 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.33707370737073705, | |
| "eval_loss": 5.8459153175354, | |
| "eval_precision": 0.3366756679639873, | |
| "eval_recall": 0.33707370737073705, | |
| "eval_runtime": 100.8652, | |
| "eval_samples_per_second": 90.12, | |
| "eval_steps_per_second": 0.456, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 15.53, | |
| "learning_rate": 6.894409937888199e-05, | |
| "loss": 0.0243, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.35401540154015404, | |
| "eval_loss": 5.645482540130615, | |
| "eval_precision": 0.3631233149571772, | |
| "eval_recall": 0.35401540154015404, | |
| "eval_runtime": 99.6631, | |
| "eval_samples_per_second": 91.207, | |
| "eval_steps_per_second": 0.462, | |
| "step": 5152 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.3341034103410341, | |
| "eval_loss": 5.483065605163574, | |
| "eval_precision": 0.352613586631227, | |
| "eval_recall": 0.3341034103410341, | |
| "eval_runtime": 98.2468, | |
| "eval_samples_per_second": 92.522, | |
| "eval_steps_per_second": 0.468, | |
| "step": 5474 | |
| }, | |
| { | |
| "epoch": 17.08, | |
| "learning_rate": 6.58385093167702e-05, | |
| "loss": 0.0173, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.3547854785478548, | |
| "eval_loss": 5.973876476287842, | |
| "eval_precision": 0.37074034210656404, | |
| "eval_recall": 0.3547854785478548, | |
| "eval_runtime": 99.9918, | |
| "eval_samples_per_second": 90.907, | |
| "eval_steps_per_second": 0.46, | |
| "step": 5796 | |
| }, | |
| { | |
| "epoch": 18.63, | |
| "learning_rate": 6.273291925465838e-05, | |
| "loss": 0.017, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.3471947194719472, | |
| "eval_loss": 5.270116329193115, | |
| "eval_precision": 0.3540165595958133, | |
| "eval_recall": 0.3471947194719472, | |
| "eval_runtime": 98.3519, | |
| "eval_samples_per_second": 92.423, | |
| "eval_steps_per_second": 0.468, | |
| "step": 6118 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.34994499449944994, | |
| "eval_loss": 6.121872901916504, | |
| "eval_precision": 0.3608309273966228, | |
| "eval_recall": 0.34994499449944994, | |
| "eval_runtime": 98.8185, | |
| "eval_samples_per_second": 91.987, | |
| "eval_steps_per_second": 0.465, | |
| "step": 6440 | |
| }, | |
| { | |
| "epoch": 20.19, | |
| "learning_rate": 5.962732919254659e-05, | |
| "loss": 0.0152, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.3448844884488449, | |
| "eval_loss": 6.195789337158203, | |
| "eval_precision": 0.3597641027560754, | |
| "eval_recall": 0.3448844884488449, | |
| "eval_runtime": 98.2972, | |
| "eval_samples_per_second": 92.475, | |
| "eval_steps_per_second": 0.468, | |
| "step": 6762 | |
| }, | |
| { | |
| "epoch": 21.74, | |
| "learning_rate": 5.652173913043478e-05, | |
| "loss": 0.011, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.35687568756875687, | |
| "eval_loss": 5.533013820648193, | |
| "eval_precision": 0.3680252789908563, | |
| "eval_recall": 0.35687568756875687, | |
| "eval_runtime": 97.9019, | |
| "eval_samples_per_second": 92.848, | |
| "eval_steps_per_second": 0.47, | |
| "step": 7084 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 16100, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 50, | |
| "save_steps": 500, | |
| "total_flos": 3.727239090605691e+17, | |
| "train_batch_size": 200, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |