| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 35.0, | |
| "global_step": 33600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.535132032339264e-05, | |
| "loss": 2.3734, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6711076941043658, | |
| "eval_loss": 1.6856393814086914, | |
| "eval_runtime": 42.9945, | |
| "eval_samples_per_second": 176.86, | |
| "eval_steps_per_second": 1.116, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.6900880215595094e-05, | |
| "loss": 1.5002, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6930295365255809, | |
| "eval_loss": 1.5316802263259888, | |
| "eval_runtime": 40.2605, | |
| "eval_samples_per_second": 188.87, | |
| "eval_steps_per_second": 1.192, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.7807314645155048e-05, | |
| "loss": 1.3682, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7001060409279067, | |
| "eval_loss": 1.4794470071792603, | |
| "eval_runtime": 40.3426, | |
| "eval_samples_per_second": 188.486, | |
| "eval_steps_per_second": 1.19, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.8450440107797548e-05, | |
| "loss": 1.3057, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7058089394925496, | |
| "eval_loss": 1.445176362991333, | |
| "eval_runtime": 40.2825, | |
| "eval_samples_per_second": 188.767, | |
| "eval_steps_per_second": 1.192, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.894928697180815e-05, | |
| "loss": 1.2652, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7078205742901283, | |
| "eval_loss": 1.4240751266479492, | |
| "eval_runtime": 44.133, | |
| "eval_samples_per_second": 172.297, | |
| "eval_steps_per_second": 1.088, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.93568745373575e-05, | |
| "loss": 1.2347, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.712788831055115, | |
| "eval_loss": 1.393662452697754, | |
| "eval_runtime": 40.438, | |
| "eval_samples_per_second": 188.041, | |
| "eval_steps_per_second": 1.187, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.9701484913790247e-05, | |
| "loss": 1.2117, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7158340831339961, | |
| "eval_loss": 1.3783458471298218, | |
| "eval_runtime": 47.0316, | |
| "eval_samples_per_second": 161.679, | |
| "eval_steps_per_second": 1.021, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1863, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7177737277852768, | |
| "eval_loss": 1.356780767440796, | |
| "eval_runtime": 40.2772, | |
| "eval_samples_per_second": 188.791, | |
| "eval_steps_per_second": 1.192, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.167, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7172315754804711, | |
| "eval_loss": 1.362362265586853, | |
| "eval_runtime": 40.3662, | |
| "eval_samples_per_second": 188.375, | |
| "eval_steps_per_second": 1.189, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1528, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7208890541082557, | |
| "eval_loss": 1.3375591039657593, | |
| "eval_runtime": 40.4301, | |
| "eval_samples_per_second": 188.077, | |
| "eval_steps_per_second": 1.187, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1403, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.722637380785269, | |
| "eval_loss": 1.3316693305969238, | |
| "eval_runtime": 40.2778, | |
| "eval_samples_per_second": 188.789, | |
| "eval_steps_per_second": 1.192, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1276, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7243298395325108, | |
| "eval_loss": 1.3127739429473877, | |
| "eval_runtime": 40.1949, | |
| "eval_samples_per_second": 189.178, | |
| "eval_steps_per_second": 1.194, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1176, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.7242034818873964, | |
| "eval_loss": 1.3149378299713135, | |
| "eval_runtime": 40.5071, | |
| "eval_samples_per_second": 187.72, | |
| "eval_steps_per_second": 1.185, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1061, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7251281499452183, | |
| "eval_loss": 1.3011534214019775, | |
| "eval_runtime": 41.5767, | |
| "eval_samples_per_second": 182.891, | |
| "eval_steps_per_second": 1.154, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0953, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7274233697774307, | |
| "eval_loss": 1.2953981161117554, | |
| "eval_runtime": 40.2998, | |
| "eval_samples_per_second": 188.686, | |
| "eval_steps_per_second": 1.191, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0872, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.729807233922918, | |
| "eval_loss": 1.2836934328079224, | |
| "eval_runtime": 40.3358, | |
| "eval_samples_per_second": 188.517, | |
| "eval_steps_per_second": 1.19, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0778, | |
| "step": 16320 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.728892083446406, | |
| "eval_loss": 1.2819887399673462, | |
| "eval_runtime": 40.2817, | |
| "eval_samples_per_second": 188.77, | |
| "eval_steps_per_second": 1.192, | |
| "step": 16320 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0709, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7314453291503982, | |
| "eval_loss": 1.2700670957565308, | |
| "eval_runtime": 40.2932, | |
| "eval_samples_per_second": 188.717, | |
| "eval_steps_per_second": 1.191, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0629, | |
| "step": 18240 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7313673197000715, | |
| "eval_loss": 1.2694642543792725, | |
| "eval_runtime": 40.2951, | |
| "eval_samples_per_second": 188.708, | |
| "eval_steps_per_second": 1.191, | |
| "step": 18240 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0575, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7321188555482103, | |
| "eval_loss": 1.269392490386963, | |
| "eval_runtime": 40.9309, | |
| "eval_samples_per_second": 185.776, | |
| "eval_steps_per_second": 1.173, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0494, | |
| "step": 20160 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7336233174474587, | |
| "eval_loss": 1.2491707801818848, | |
| "eval_runtime": 41.3244, | |
| "eval_samples_per_second": 184.007, | |
| "eval_steps_per_second": 1.162, | |
| "step": 20160 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0443, | |
| "step": 21120 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7333931208601605, | |
| "eval_loss": 1.2573738098144531, | |
| "eval_runtime": 40.4041, | |
| "eval_samples_per_second": 188.199, | |
| "eval_steps_per_second": 1.188, | |
| "step": 21120 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0375, | |
| "step": 22080 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.7354173520930503, | |
| "eval_loss": 1.2430847883224487, | |
| "eval_runtime": 40.4071, | |
| "eval_samples_per_second": 188.185, | |
| "eval_steps_per_second": 1.188, | |
| "step": 22080 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0332, | |
| "step": 23040 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7351443165552266, | |
| "eval_loss": 1.240692377090454, | |
| "eval_runtime": 40.2279, | |
| "eval_samples_per_second": 189.023, | |
| "eval_steps_per_second": 1.193, | |
| "step": 23040 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0279, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.7350845404729578, | |
| "eval_loss": 1.2445788383483887, | |
| "eval_runtime": 40.3432, | |
| "eval_samples_per_second": 188.483, | |
| "eval_steps_per_second": 1.19, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0233, | |
| "step": 24960 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7361292168770468, | |
| "eval_loss": 1.2367281913757324, | |
| "eval_runtime": 40.3061, | |
| "eval_samples_per_second": 188.657, | |
| "eval_steps_per_second": 1.191, | |
| "step": 24960 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.018, | |
| "step": 25920 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.7351869592986123, | |
| "eval_loss": 1.2435057163238525, | |
| "eval_runtime": 41.4116, | |
| "eval_samples_per_second": 183.62, | |
| "eval_steps_per_second": 1.159, | |
| "step": 25920 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0128, | |
| "step": 26880 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7379444038039502, | |
| "eval_loss": 1.2293747663497925, | |
| "eval_runtime": 42.7481, | |
| "eval_samples_per_second": 177.879, | |
| "eval_steps_per_second": 1.123, | |
| "step": 26880 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.008, | |
| "step": 27840 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7381460506414618, | |
| "eval_loss": 1.224423885345459, | |
| "eval_runtime": 40.3086, | |
| "eval_samples_per_second": 188.645, | |
| "eval_steps_per_second": 1.191, | |
| "step": 27840 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0036, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7393030501323384, | |
| "eval_loss": 1.2178620100021362, | |
| "eval_runtime": 40.2497, | |
| "eval_samples_per_second": 188.921, | |
| "eval_steps_per_second": 1.193, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9997, | |
| "step": 29760 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.7388722666381714, | |
| "eval_loss": 1.2249476909637451, | |
| "eval_runtime": 40.2623, | |
| "eval_samples_per_second": 188.862, | |
| "eval_steps_per_second": 1.192, | |
| "step": 29760 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9969, | |
| "step": 30720 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7389634850823794, | |
| "eval_loss": 1.2235573530197144, | |
| "eval_runtime": 40.3447, | |
| "eval_samples_per_second": 188.476, | |
| "eval_steps_per_second": 1.19, | |
| "step": 30720 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.992, | |
| "step": 31680 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.7388708814628272, | |
| "eval_loss": 1.217455506324768, | |
| "eval_runtime": 42.7026, | |
| "eval_samples_per_second": 178.069, | |
| "eval_steps_per_second": 1.124, | |
| "step": 31680 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.988, | |
| "step": 32640 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7401730933519727, | |
| "eval_loss": 1.2093894481658936, | |
| "eval_runtime": 40.7008, | |
| "eval_samples_per_second": 186.827, | |
| "eval_steps_per_second": 1.179, | |
| "step": 32640 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9836, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.7400202510644008, | |
| "eval_loss": 1.208998203277588, | |
| "eval_runtime": 40.6904, | |
| "eval_samples_per_second": 186.874, | |
| "eval_steps_per_second": 1.18, | |
| "step": 33600 | |
| } | |
| ], | |
| "max_steps": 38400, | |
| "num_train_epochs": 40, | |
| "total_flos": 2041828249436160.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |