| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 36.0, | |
| "global_step": 972, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.2262943855309169e-05, | |
| "loss": 2.057, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6772454727151596, | |
| "eval_loss": 1.7236038446426392, | |
| "eval_runtime": 13.5733, | |
| "eval_samples_per_second": 32.711, | |
| "eval_steps_per_second": 0.147, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.4841962570206113e-05, | |
| "loss": 1.7092, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6982139066622192, | |
| "eval_loss": 1.5525641441345215, | |
| "eval_runtime": 12.8991, | |
| "eval_samples_per_second": 34.421, | |
| "eval_steps_per_second": 0.155, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.6350591807078892e-05, | |
| "loss": 1.5646, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7100154712905392, | |
| "eval_loss": 1.452789068222046, | |
| "eval_runtime": 12.9607, | |
| "eval_samples_per_second": 34.258, | |
| "eval_steps_per_second": 0.154, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.7420981285103056e-05, | |
| "loss": 1.4688, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7290463925156171, | |
| "eval_loss": 1.3419641256332397, | |
| "eval_runtime": 12.9711, | |
| "eval_samples_per_second": 34.23, | |
| "eval_steps_per_second": 0.154, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.825123986666868e-05, | |
| "loss": 1.3785, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.74070063507858, | |
| "eval_loss": 1.2742513418197632, | |
| "eval_runtime": 12.944, | |
| "eval_samples_per_second": 34.302, | |
| "eval_steps_per_second": 0.155, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.892961052197583e-05, | |
| "loss": 1.3459, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.739266862170088, | |
| "eval_loss": 1.2691913843154907, | |
| "eval_runtime": 12.9659, | |
| "eval_samples_per_second": 34.244, | |
| "eval_steps_per_second": 0.154, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.9503164738653782e-05, | |
| "loss": 1.3059, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7480331638828371, | |
| "eval_loss": 1.2231497764587402, | |
| "eval_runtime": 12.921, | |
| "eval_samples_per_second": 34.363, | |
| "eval_steps_per_second": 0.155, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 1.9999999999999998e-05, | |
| "loss": 1.2666, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7594158570229099, | |
| "eval_loss": 1.151406168937683, | |
| "eval_runtime": 12.9461, | |
| "eval_samples_per_second": 34.296, | |
| "eval_steps_per_second": 0.154, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2463, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7459728430463017, | |
| "eval_loss": 1.2034211158752441, | |
| "eval_runtime": 13.0359, | |
| "eval_samples_per_second": 34.06, | |
| "eval_steps_per_second": 0.153, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2276, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7586074755335456, | |
| "eval_loss": 1.1566089391708374, | |
| "eval_runtime": 12.9245, | |
| "eval_samples_per_second": 34.353, | |
| "eval_steps_per_second": 0.155, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.189, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7611567732115677, | |
| "eval_loss": 1.1319142580032349, | |
| "eval_runtime": 12.9698, | |
| "eval_samples_per_second": 34.233, | |
| "eval_steps_per_second": 0.154, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1568, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7545374996471618, | |
| "eval_loss": 1.1536731719970703, | |
| "eval_runtime": 12.9366, | |
| "eval_samples_per_second": 34.321, | |
| "eval_steps_per_second": 0.155, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1447, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.7683153013910355, | |
| "eval_loss": 1.0927276611328125, | |
| "eval_runtime": 12.9899, | |
| "eval_samples_per_second": 34.18, | |
| "eval_steps_per_second": 0.154, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1262, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7699539058709365, | |
| "eval_loss": 1.0704097747802734, | |
| "eval_runtime": 12.9791, | |
| "eval_samples_per_second": 34.209, | |
| "eval_steps_per_second": 0.154, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1173, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.774438983954053, | |
| "eval_loss": 1.0296632051467896, | |
| "eval_runtime": 12.9361, | |
| "eval_samples_per_second": 34.323, | |
| "eval_steps_per_second": 0.155, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0997, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7737643712219984, | |
| "eval_loss": 1.0550481081008911, | |
| "eval_runtime": 12.9715, | |
| "eval_samples_per_second": 34.229, | |
| "eval_steps_per_second": 0.154, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0962, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7746747253401614, | |
| "eval_loss": 1.0655121803283691, | |
| "eval_runtime": 12.9954, | |
| "eval_samples_per_second": 34.166, | |
| "eval_steps_per_second": 0.154, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0864, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7735955893309492, | |
| "eval_loss": 1.0611152648925781, | |
| "eval_runtime": 12.9386, | |
| "eval_samples_per_second": 34.316, | |
| "eval_steps_per_second": 0.155, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0817, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7697911607576493, | |
| "eval_loss": 1.0739043951034546, | |
| "eval_runtime": 13.0147, | |
| "eval_samples_per_second": 34.115, | |
| "eval_steps_per_second": 0.154, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0615, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7779283925151024, | |
| "eval_loss": 1.0259206295013428, | |
| "eval_runtime": 12.248, | |
| "eval_samples_per_second": 36.251, | |
| "eval_steps_per_second": 0.163, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0337, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7820505322259913, | |
| "eval_loss": 1.0050867795944214, | |
| "eval_runtime": 13.0151, | |
| "eval_samples_per_second": 34.114, | |
| "eval_steps_per_second": 0.154, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0248, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7823669101512009, | |
| "eval_loss": 0.9815566539764404, | |
| "eval_runtime": 12.966, | |
| "eval_samples_per_second": 34.243, | |
| "eval_steps_per_second": 0.154, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0078, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.7890613318979696, | |
| "eval_loss": 0.9701399207115173, | |
| "eval_runtime": 12.9372, | |
| "eval_samples_per_second": 34.32, | |
| "eval_steps_per_second": 0.155, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0161, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7868592237542407, | |
| "eval_loss": 0.9783701300621033, | |
| "eval_runtime": 12.9845, | |
| "eval_samples_per_second": 34.195, | |
| "eval_steps_per_second": 0.154, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0005, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.782170183167169, | |
| "eval_loss": 0.9962915182113647, | |
| "eval_runtime": 13.0152, | |
| "eval_samples_per_second": 34.114, | |
| "eval_steps_per_second": 0.154, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0008, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.790520909757887, | |
| "eval_loss": 0.9529848694801331, | |
| "eval_runtime": 13.0266, | |
| "eval_samples_per_second": 34.084, | |
| "eval_steps_per_second": 0.154, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9961, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.7787046824557895, | |
| "eval_loss": 1.0195859670639038, | |
| "eval_runtime": 12.9955, | |
| "eval_samples_per_second": 34.166, | |
| "eval_steps_per_second": 0.154, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9834, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7917842772205873, | |
| "eval_loss": 0.9555456638336182, | |
| "eval_runtime": 12.2326, | |
| "eval_samples_per_second": 36.296, | |
| "eval_steps_per_second": 0.163, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9647, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7914557776443338, | |
| "eval_loss": 0.9375360608100891, | |
| "eval_runtime": 13.0096, | |
| "eval_samples_per_second": 34.129, | |
| "eval_steps_per_second": 0.154, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.967, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.793787977110495, | |
| "eval_loss": 0.9494355320930481, | |
| "eval_runtime": 13.0239, | |
| "eval_samples_per_second": 34.091, | |
| "eval_steps_per_second": 0.154, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9625, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.7860187306097597, | |
| "eval_loss": 0.9812522530555725, | |
| "eval_runtime": 12.989, | |
| "eval_samples_per_second": 34.183, | |
| "eval_steps_per_second": 0.154, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9578, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7956582591346297, | |
| "eval_loss": 0.9389752149581909, | |
| "eval_runtime": 13.0492, | |
| "eval_samples_per_second": 34.025, | |
| "eval_steps_per_second": 0.153, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9462, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.79146801197472, | |
| "eval_loss": 0.9519514441490173, | |
| "eval_runtime": 12.965, | |
| "eval_samples_per_second": 34.246, | |
| "eval_steps_per_second": 0.154, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9468, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7949606757937664, | |
| "eval_loss": 0.922423243522644, | |
| "eval_runtime": 12.9466, | |
| "eval_samples_per_second": 34.295, | |
| "eval_steps_per_second": 0.154, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9357, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.8009954921111946, | |
| "eval_loss": 0.908001184463501, | |
| "eval_runtime": 12.9778, | |
| "eval_samples_per_second": 34.212, | |
| "eval_steps_per_second": 0.154, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9328, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.7935578330893118, | |
| "eval_loss": 0.9237804412841797, | |
| "eval_runtime": 12.9467, | |
| "eval_samples_per_second": 34.294, | |
| "eval_steps_per_second": 0.154, | |
| "step": 972 | |
| } | |
| ], | |
| "max_steps": 1080, | |
| "num_train_epochs": 40, | |
| "total_flos": 302668861931520.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |