| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 37.0, | |
| "global_step": 999, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.2262943855309169e-05, | |
| "loss": 2.3623, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.638755980861244, | |
| "eval_loss": 2.000427007675171, | |
| "eval_runtime": 13.6799, | |
| "eval_samples_per_second": 32.456, | |
| "eval_steps_per_second": 0.146, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.4841962570206113e-05, | |
| "loss": 2.0119, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6597628650271401, | |
| "eval_loss": 1.8219548463821411, | |
| "eval_runtime": 13.3645, | |
| "eval_samples_per_second": 33.222, | |
| "eval_steps_per_second": 0.15, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.6350591807078892e-05, | |
| "loss": 1.8529, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6718626849986864, | |
| "eval_loss": 1.7023260593414307, | |
| "eval_runtime": 13.2825, | |
| "eval_samples_per_second": 33.427, | |
| "eval_steps_per_second": 0.151, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.7420981285103056e-05, | |
| "loss": 1.7397, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6914172365811054, | |
| "eval_loss": 1.5797321796417236, | |
| "eval_runtime": 13.2578, | |
| "eval_samples_per_second": 33.49, | |
| "eval_steps_per_second": 0.151, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.825123986666868e-05, | |
| "loss": 1.6316, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.704878691211332, | |
| "eval_loss": 1.4879175424575806, | |
| "eval_runtime": 13.2974, | |
| "eval_samples_per_second": 33.39, | |
| "eval_steps_per_second": 0.15, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.892961052197583e-05, | |
| "loss": 1.5906, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7069794721407625, | |
| "eval_loss": 1.4811629056930542, | |
| "eval_runtime": 13.1968, | |
| "eval_samples_per_second": 33.644, | |
| "eval_steps_per_second": 0.152, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.9503164738653782e-05, | |
| "loss": 1.5414, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7172597434035343, | |
| "eval_loss": 1.424712061882019, | |
| "eval_runtime": 13.0856, | |
| "eval_samples_per_second": 33.931, | |
| "eval_steps_per_second": 0.153, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 1.9999999999999998e-05, | |
| "loss": 1.4984, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.726310423413698, | |
| "eval_loss": 1.3407562971115112, | |
| "eval_runtime": 13.269, | |
| "eval_samples_per_second": 33.462, | |
| "eval_steps_per_second": 0.151, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.4759, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.714683309437463, | |
| "eval_loss": 1.409363865852356, | |
| "eval_runtime": 13.377, | |
| "eval_samples_per_second": 33.191, | |
| "eval_steps_per_second": 0.15, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.4544, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7280686239830209, | |
| "eval_loss": 1.338972806930542, | |
| "eval_runtime": 13.2791, | |
| "eval_samples_per_second": 33.436, | |
| "eval_steps_per_second": 0.151, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.4112, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7280365296803653, | |
| "eval_loss": 1.3258930444717407, | |
| "eval_runtime": 13.3211, | |
| "eval_samples_per_second": 33.331, | |
| "eval_steps_per_second": 0.15, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3741, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7261693058966325, | |
| "eval_loss": 1.3296260833740234, | |
| "eval_runtime": 13.335, | |
| "eval_samples_per_second": 33.296, | |
| "eval_steps_per_second": 0.15, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.363, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.737372488408037, | |
| "eval_loss": 1.2715643644332886, | |
| "eval_runtime": 13.2444, | |
| "eval_samples_per_second": 33.524, | |
| "eval_steps_per_second": 0.151, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3399, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7395378457059679, | |
| "eval_loss": 1.2554606199264526, | |
| "eval_runtime": 13.3293, | |
| "eval_samples_per_second": 33.31, | |
| "eval_steps_per_second": 0.15, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3276, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7437385280359997, | |
| "eval_loss": 1.1987595558166504, | |
| "eval_runtime": 13.2943, | |
| "eval_samples_per_second": 33.398, | |
| "eval_steps_per_second": 0.15, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.314, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7433329382481925, | |
| "eval_loss": 1.2292665243148804, | |
| "eval_runtime": 13.3081, | |
| "eval_samples_per_second": 33.363, | |
| "eval_steps_per_second": 0.15, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3116, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7434126298865632, | |
| "eval_loss": 1.2456351518630981, | |
| "eval_runtime": 13.0321, | |
| "eval_samples_per_second": 34.07, | |
| "eval_steps_per_second": 0.153, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2997, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.740932796900611, | |
| "eval_loss": 1.2431975603103638, | |
| "eval_runtime": 13.2685, | |
| "eval_samples_per_second": 33.463, | |
| "eval_steps_per_second": 0.151, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2975, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7378796103191155, | |
| "eval_loss": 1.2589606046676636, | |
| "eval_runtime": 13.2957, | |
| "eval_samples_per_second": 33.394, | |
| "eval_steps_per_second": 0.15, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.274, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7467216737881244, | |
| "eval_loss": 1.2022136449813843, | |
| "eval_runtime": 13.2148, | |
| "eval_samples_per_second": 33.599, | |
| "eval_steps_per_second": 0.151, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2447, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7521932389753188, | |
| "eval_loss": 1.1791794300079346, | |
| "eval_runtime": 13.3942, | |
| "eval_samples_per_second": 33.149, | |
| "eval_steps_per_second": 0.149, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2338, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.749752006492921, | |
| "eval_loss": 1.1663883924484253, | |
| "eval_runtime": 13.2721, | |
| "eval_samples_per_second": 33.454, | |
| "eval_steps_per_second": 0.151, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2129, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.757722556143656, | |
| "eval_loss": 1.1311209201812744, | |
| "eval_runtime": 13.3266, | |
| "eval_samples_per_second": 33.317, | |
| "eval_steps_per_second": 0.15, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2233, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7568863020946808, | |
| "eval_loss": 1.140002727508545, | |
| "eval_runtime": 13.2772, | |
| "eval_samples_per_second": 33.441, | |
| "eval_steps_per_second": 0.151, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2072, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.7496811963830281, | |
| "eval_loss": 1.1746466159820557, | |
| "eval_runtime": 13.3397, | |
| "eval_samples_per_second": 33.284, | |
| "eval_steps_per_second": 0.15, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2086, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7594424064563463, | |
| "eval_loss": 1.1140285730361938, | |
| "eval_runtime": 13.2221, | |
| "eval_samples_per_second": 33.58, | |
| "eval_steps_per_second": 0.151, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1993, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.746650287118247, | |
| "eval_loss": 1.1945430040359497, | |
| "eval_runtime": 13.2631, | |
| "eval_samples_per_second": 33.476, | |
| "eval_steps_per_second": 0.151, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1866, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7611254579304546, | |
| "eval_loss": 1.116799235343933, | |
| "eval_runtime": 13.2563, | |
| "eval_samples_per_second": 33.494, | |
| "eval_steps_per_second": 0.151, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1658, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7609447828200508, | |
| "eval_loss": 1.0980545282363892, | |
| "eval_runtime": 13.2601, | |
| "eval_samples_per_second": 33.484, | |
| "eval_steps_per_second": 0.151, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1682, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7663264704147248, | |
| "eval_loss": 1.1021370887756348, | |
| "eval_runtime": 13.3382, | |
| "eval_samples_per_second": 33.288, | |
| "eval_steps_per_second": 0.15, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1598, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.7555162515584679, | |
| "eval_loss": 1.1446928977966309, | |
| "eval_runtime": 13.2762, | |
| "eval_samples_per_second": 33.443, | |
| "eval_steps_per_second": 0.151, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1586, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7645990292129478, | |
| "eval_loss": 1.1078757047653198, | |
| "eval_runtime": 13.2153, | |
| "eval_samples_per_second": 33.597, | |
| "eval_steps_per_second": 0.151, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1457, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.7622519126288946, | |
| "eval_loss": 1.114358901977539, | |
| "eval_runtime": 13.3206, | |
| "eval_samples_per_second": 33.332, | |
| "eval_steps_per_second": 0.15, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1456, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7681910865132537, | |
| "eval_loss": 1.0843584537506104, | |
| "eval_runtime": 13.3243, | |
| "eval_samples_per_second": 33.323, | |
| "eval_steps_per_second": 0.15, | |
| "step": 918 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1338, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.7738229401452542, | |
| "eval_loss": 1.075467586517334, | |
| "eval_runtime": 13.2446, | |
| "eval_samples_per_second": 33.523, | |
| "eval_steps_per_second": 0.151, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1335, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.7666764275256223, | |
| "eval_loss": 1.0833895206451416, | |
| "eval_runtime": 13.2849, | |
| "eval_samples_per_second": 33.421, | |
| "eval_steps_per_second": 0.151, | |
| "step": 972 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1268, | |
| "step": 999 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.7761633253632518, | |
| "eval_loss": 1.0331711769104004, | |
| "eval_runtime": 13.3282, | |
| "eval_samples_per_second": 33.313, | |
| "eval_steps_per_second": 0.15, | |
| "step": 999 | |
| } | |
| ], | |
| "max_steps": 1080, | |
| "num_train_epochs": 40, | |
| "total_flos": 118496962805760.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |