| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 37.0, | |
| "global_step": 999, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.2262943855309169e-05, | |
| "loss": 3.3787, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.5104135133498819, | |
| "eval_loss": 3.0533134937286377, | |
| "eval_runtime": 7.7526, | |
| "eval_samples_per_second": 57.271, | |
| "eval_steps_per_second": 0.258, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.4841962570206113e-05, | |
| "loss": 3.0423, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.5218076561694905, | |
| "eval_loss": 2.9270966053009033, | |
| "eval_runtime": 7.1668, | |
| "eval_samples_per_second": 61.953, | |
| "eval_steps_per_second": 0.279, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.6350591807078892e-05, | |
| "loss": 2.8826, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.5349013657056145, | |
| "eval_loss": 2.826730489730835, | |
| "eval_runtime": 7.1613, | |
| "eval_samples_per_second": 62.0, | |
| "eval_steps_per_second": 0.279, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.7420981285103056e-05, | |
| "loss": 2.7528, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.5677539663746152, | |
| "eval_loss": 2.5704185962677, | |
| "eval_runtime": 7.1293, | |
| "eval_samples_per_second": 62.279, | |
| "eval_steps_per_second": 0.281, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.825123986666868e-05, | |
| "loss": 2.676, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.5691213903273941, | |
| "eval_loss": 2.544525384902954, | |
| "eval_runtime": 7.2153, | |
| "eval_samples_per_second": 61.536, | |
| "eval_steps_per_second": 0.277, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.892961052197583e-05, | |
| "loss": 2.6468, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.5640251055842328, | |
| "eval_loss": 2.552178144454956, | |
| "eval_runtime": 7.1679, | |
| "eval_samples_per_second": 61.943, | |
| "eval_steps_per_second": 0.279, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.9503164738653782e-05, | |
| "loss": 2.5425, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.592901878914405, | |
| "eval_loss": 2.344503879547119, | |
| "eval_runtime": 7.1022, | |
| "eval_samples_per_second": 62.516, | |
| "eval_steps_per_second": 0.282, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 1.9999999999999998e-05, | |
| "loss": 2.4507, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.6077949332933593, | |
| "eval_loss": 2.190293788909912, | |
| "eval_runtime": 7.1171, | |
| "eval_samples_per_second": 62.385, | |
| "eval_steps_per_second": 0.281, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 2e-05, | |
| "loss": 2.3779, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.6202360876897133, | |
| "eval_loss": 2.0489487648010254, | |
| "eval_runtime": 7.164, | |
| "eval_samples_per_second": 61.977, | |
| "eval_steps_per_second": 0.279, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 2e-05, | |
| "loss": 2.2947, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.6456804315147228, | |
| "eval_loss": 1.8817191123962402, | |
| "eval_runtime": 7.1714, | |
| "eval_samples_per_second": 61.912, | |
| "eval_steps_per_second": 0.279, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 2e-05, | |
| "loss": 2.1394, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.6489209509025051, | |
| "eval_loss": 1.842598795890808, | |
| "eval_runtime": 7.136, | |
| "eval_samples_per_second": 62.22, | |
| "eval_steps_per_second": 0.28, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 2e-05, | |
| "loss": 2.0426, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.6790733111349398, | |
| "eval_loss": 1.6428455114364624, | |
| "eval_runtime": 7.107, | |
| "eval_samples_per_second": 62.474, | |
| "eval_steps_per_second": 0.281, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.9533, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.690136927023769, | |
| "eval_loss": 1.5633041858673096, | |
| "eval_runtime": 7.193, | |
| "eval_samples_per_second": 61.726, | |
| "eval_steps_per_second": 0.278, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.8598, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7024347351505412, | |
| "eval_loss": 1.4617172479629517, | |
| "eval_runtime": 7.1532, | |
| "eval_samples_per_second": 62.071, | |
| "eval_steps_per_second": 0.28, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.7533, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7221761780724604, | |
| "eval_loss": 1.3566689491271973, | |
| "eval_runtime": 7.1795, | |
| "eval_samples_per_second": 61.843, | |
| "eval_steps_per_second": 0.279, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6829, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7226731441436104, | |
| "eval_loss": 1.3594402074813843, | |
| "eval_runtime": 7.1593, | |
| "eval_samples_per_second": 62.017, | |
| "eval_steps_per_second": 0.279, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.6363, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7319418805454654, | |
| "eval_loss": 1.3049547672271729, | |
| "eval_runtime": 7.1172, | |
| "eval_samples_per_second": 62.384, | |
| "eval_steps_per_second": 0.281, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5438, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7309124489856713, | |
| "eval_loss": 1.3055365085601807, | |
| "eval_runtime": 7.1524, | |
| "eval_samples_per_second": 62.077, | |
| "eval_steps_per_second": 0.28, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.5025, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7245466228759103, | |
| "eval_loss": 1.3259419202804565, | |
| "eval_runtime": 7.228, | |
| "eval_samples_per_second": 61.428, | |
| "eval_steps_per_second": 0.277, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.4319, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7438057920631648, | |
| "eval_loss": 1.2239311933517456, | |
| "eval_runtime": 7.1524, | |
| "eval_samples_per_second": 62.077, | |
| "eval_steps_per_second": 0.28, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3768, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7472951634598515, | |
| "eval_loss": 1.1993966102600098, | |
| "eval_runtime": 7.1565, | |
| "eval_samples_per_second": 62.042, | |
| "eval_steps_per_second": 0.279, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3384, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7496693916806925, | |
| "eval_loss": 1.1782174110412598, | |
| "eval_runtime": 7.2171, | |
| "eval_samples_per_second": 61.52, | |
| "eval_steps_per_second": 0.277, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.308, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.7524660728164047, | |
| "eval_loss": 1.1727790832519531, | |
| "eval_runtime": 7.1544, | |
| "eval_samples_per_second": 62.06, | |
| "eval_steps_per_second": 0.28, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3139, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7564876470222167, | |
| "eval_loss": 1.1401317119598389, | |
| "eval_runtime": 6.2932, | |
| "eval_samples_per_second": 70.552, | |
| "eval_steps_per_second": 0.318, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2701, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.7505940307157346, | |
| "eval_loss": 1.1718164682388306, | |
| "eval_runtime": 7.2194, | |
| "eval_samples_per_second": 61.501, | |
| "eval_steps_per_second": 0.277, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2614, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7606208191526816, | |
| "eval_loss": 1.111540675163269, | |
| "eval_runtime": 7.1423, | |
| "eval_samples_per_second": 62.165, | |
| "eval_steps_per_second": 0.28, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2549, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.7517491504126567, | |
| "eval_loss": 1.1640406847000122, | |
| "eval_runtime": 7.113, | |
| "eval_samples_per_second": 62.421, | |
| "eval_steps_per_second": 0.281, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2287, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7552987090963674, | |
| "eval_loss": 1.1474734544754028, | |
| "eval_runtime": 7.171, | |
| "eval_samples_per_second": 61.916, | |
| "eval_steps_per_second": 0.279, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1967, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7647311090144953, | |
| "eval_loss": 1.0948566198349, | |
| "eval_runtime": 7.1733, | |
| "eval_samples_per_second": 61.896, | |
| "eval_steps_per_second": 0.279, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1938, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7628133294013565, | |
| "eval_loss": 1.104235291481018, | |
| "eval_runtime": 7.2106, | |
| "eval_samples_per_second": 61.576, | |
| "eval_steps_per_second": 0.277, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1831, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.7568993506493507, | |
| "eval_loss": 1.1557021141052246, | |
| "eval_runtime": 7.147, | |
| "eval_samples_per_second": 62.124, | |
| "eval_steps_per_second": 0.28, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1783, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7663818512012861, | |
| "eval_loss": 1.0878251791000366, | |
| "eval_runtime": 7.2016, | |
| "eval_samples_per_second": 61.653, | |
| "eval_steps_per_second": 0.278, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1571, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.7626662971175167, | |
| "eval_loss": 1.1019645929336548, | |
| "eval_runtime": 7.115, | |
| "eval_samples_per_second": 62.403, | |
| "eval_steps_per_second": 0.281, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1511, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7707477642809286, | |
| "eval_loss": 1.0570372343063354, | |
| "eval_runtime": 7.1537, | |
| "eval_samples_per_second": 62.066, | |
| "eval_steps_per_second": 0.28, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1332, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.7779759669545625, | |
| "eval_loss": 1.0286684036254883, | |
| "eval_runtime": 7.1266, | |
| "eval_samples_per_second": 62.301, | |
| "eval_steps_per_second": 0.281, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1343, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.7709718033554885, | |
| "eval_loss": 1.0590564012527466, | |
| "eval_runtime": 7.1932, | |
| "eval_samples_per_second": 61.725, | |
| "eval_steps_per_second": 0.278, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1164, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.7806313208703647, | |
| "eval_loss": 1.0081170797348022, | |
| "eval_runtime": 7.1199, | |
| "eval_samples_per_second": 62.36, | |
| "eval_steps_per_second": 0.281, | |
| "step": 999 | |
| } | |
| ], | |
| "max_steps": 1080, | |
| "num_train_epochs": 40, | |
| "total_flos": 118496962805760.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |