| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 35.0, | |
| "global_step": 33600, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.535132032339264e-05, | |
| "loss": 1.7652, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.66545764302116, | |
| "eval_loss": 1.7168171405792236, | |
| "eval_runtime": 51.9578, | |
| "eval_samples_per_second": 146.35, | |
| "eval_steps_per_second": 0.924, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.6900880215595094e-05, | |
| "loss": 1.5201, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6816024828889072, | |
| "eval_loss": 1.6037945747375488, | |
| "eval_runtime": 51.6258, | |
| "eval_samples_per_second": 147.291, | |
| "eval_steps_per_second": 0.93, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.7807314645155048e-05, | |
| "loss": 1.4366, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6859236059235067, | |
| "eval_loss": 1.5763635635375977, | |
| "eval_runtime": 52.5239, | |
| "eval_samples_per_second": 144.772, | |
| "eval_steps_per_second": 0.914, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.8450440107797548e-05, | |
| "loss": 1.3831, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6914059105006866, | |
| "eval_loss": 1.5295616388320923, | |
| "eval_runtime": 51.6883, | |
| "eval_samples_per_second": 147.113, | |
| "eval_steps_per_second": 0.929, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.894928697180815e-05, | |
| "loss": 1.3447, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.6934798357287497, | |
| "eval_loss": 1.5127382278442383, | |
| "eval_runtime": 51.7137, | |
| "eval_samples_per_second": 147.04, | |
| "eval_steps_per_second": 0.928, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.93568745373575e-05, | |
| "loss": 1.314, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.6982260680110423, | |
| "eval_loss": 1.4813944101333618, | |
| "eval_runtime": 51.6818, | |
| "eval_samples_per_second": 147.131, | |
| "eval_steps_per_second": 0.929, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.9701484913790247e-05, | |
| "loss": 1.29, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.6991250528199647, | |
| "eval_loss": 1.4718950986862183, | |
| "eval_runtime": 52.9344, | |
| "eval_samples_per_second": 143.649, | |
| "eval_steps_per_second": 0.907, | |
| "step": 6720 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2669, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7032825019384854, | |
| "eval_loss": 1.449450135231018, | |
| "eval_runtime": 50.9613, | |
| "eval_samples_per_second": 149.211, | |
| "eval_steps_per_second": 0.942, | |
| "step": 7680 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2461, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7032601118345916, | |
| "eval_loss": 1.4466781616210938, | |
| "eval_runtime": 50.8922, | |
| "eval_samples_per_second": 149.414, | |
| "eval_steps_per_second": 0.943, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2323, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7045017515092793, | |
| "eval_loss": 1.4342981576919556, | |
| "eval_runtime": 51.2867, | |
| "eval_samples_per_second": 148.264, | |
| "eval_steps_per_second": 0.936, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2191, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7057310891893632, | |
| "eval_loss": 1.4175918102264404, | |
| "eval_runtime": 51.4742, | |
| "eval_samples_per_second": 147.725, | |
| "eval_steps_per_second": 0.933, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2046, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7111463874637353, | |
| "eval_loss": 1.3918827772140503, | |
| "eval_runtime": 51.2424, | |
| "eval_samples_per_second": 148.393, | |
| "eval_steps_per_second": 0.937, | |
| "step": 11520 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1944, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.7096209003462108, | |
| "eval_loss": 1.397229790687561, | |
| "eval_runtime": 51.3927, | |
| "eval_samples_per_second": 147.959, | |
| "eval_steps_per_second": 0.934, | |
| "step": 12480 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1821, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7115298351243047, | |
| "eval_loss": 1.386526107788086, | |
| "eval_runtime": 51.5062, | |
| "eval_samples_per_second": 147.633, | |
| "eval_steps_per_second": 0.932, | |
| "step": 13440 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1716, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7124512834797834, | |
| "eval_loss": 1.383684515953064, | |
| "eval_runtime": 51.5334, | |
| "eval_samples_per_second": 147.555, | |
| "eval_steps_per_second": 0.931, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1627, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7130167108614363, | |
| "eval_loss": 1.3739854097366333, | |
| "eval_runtime": 51.1674, | |
| "eval_samples_per_second": 148.61, | |
| "eval_steps_per_second": 0.938, | |
| "step": 15360 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1535, | |
| "step": 16320 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7154258294693137, | |
| "eval_loss": 1.3581925630569458, | |
| "eval_runtime": 50.9905, | |
| "eval_samples_per_second": 149.126, | |
| "eval_steps_per_second": 0.941, | |
| "step": 16320 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1459, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7157034442498541, | |
| "eval_loss": 1.3612279891967773, | |
| "eval_runtime": 51.0142, | |
| "eval_samples_per_second": 149.057, | |
| "eval_steps_per_second": 0.941, | |
| "step": 17280 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1381, | |
| "step": 18240 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7173944953215458, | |
| "eval_loss": 1.3571031093597412, | |
| "eval_runtime": 51.233, | |
| "eval_samples_per_second": 148.42, | |
| "eval_steps_per_second": 0.937, | |
| "step": 18240 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1314, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7164889608500034, | |
| "eval_loss": 1.3498369455337524, | |
| "eval_runtime": 51.475, | |
| "eval_samples_per_second": 147.722, | |
| "eval_steps_per_second": 0.932, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1216, | |
| "step": 20160 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7190562640400042, | |
| "eval_loss": 1.3336502313613892, | |
| "eval_runtime": 51.2931, | |
| "eval_samples_per_second": 148.246, | |
| "eval_steps_per_second": 0.936, | |
| "step": 20160 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1187, | |
| "step": 21120 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7189180295993838, | |
| "eval_loss": 1.3376628160476685, | |
| "eval_runtime": 51.22, | |
| "eval_samples_per_second": 148.458, | |
| "eval_steps_per_second": 0.937, | |
| "step": 21120 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1128, | |
| "step": 22080 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.7185502647071909, | |
| "eval_loss": 1.3377180099487305, | |
| "eval_runtime": 50.9033, | |
| "eval_samples_per_second": 149.381, | |
| "eval_steps_per_second": 0.943, | |
| "step": 22080 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1052, | |
| "step": 23040 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7208811149658572, | |
| "eval_loss": 1.3223472833633423, | |
| "eval_runtime": 51.1878, | |
| "eval_samples_per_second": 148.551, | |
| "eval_steps_per_second": 0.938, | |
| "step": 23040 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0996, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.7216902223097578, | |
| "eval_loss": 1.3264613151550293, | |
| "eval_runtime": 50.8905, | |
| "eval_samples_per_second": 149.419, | |
| "eval_steps_per_second": 0.943, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0961, | |
| "step": 24960 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7211948506436695, | |
| "eval_loss": 1.320527195930481, | |
| "eval_runtime": 50.9638, | |
| "eval_samples_per_second": 149.204, | |
| "eval_steps_per_second": 0.942, | |
| "step": 24960 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0902, | |
| "step": 25920 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.7213838036019521, | |
| "eval_loss": 1.321337342262268, | |
| "eval_runtime": 52.1531, | |
| "eval_samples_per_second": 145.802, | |
| "eval_steps_per_second": 0.92, | |
| "step": 25920 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0835, | |
| "step": 26880 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7239870388684853, | |
| "eval_loss": 1.3021934032440186, | |
| "eval_runtime": 51.6703, | |
| "eval_samples_per_second": 147.164, | |
| "eval_steps_per_second": 0.929, | |
| "step": 26880 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0796, | |
| "step": 27840 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7225473777214267, | |
| "eval_loss": 1.3106894493103027, | |
| "eval_runtime": 51.4692, | |
| "eval_samples_per_second": 147.739, | |
| "eval_steps_per_second": 0.933, | |
| "step": 27840 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.076, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7246294369637408, | |
| "eval_loss": 1.3005998134613037, | |
| "eval_runtime": 51.1849, | |
| "eval_samples_per_second": 148.56, | |
| "eval_steps_per_second": 0.938, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0713, | |
| "step": 29760 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.723824419552894, | |
| "eval_loss": 1.3021259307861328, | |
| "eval_runtime": 51.1204, | |
| "eval_samples_per_second": 148.747, | |
| "eval_steps_per_second": 0.939, | |
| "step": 29760 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0679, | |
| "step": 30720 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7239979316606835, | |
| "eval_loss": 1.3063867092132568, | |
| "eval_runtime": 50.892, | |
| "eval_samples_per_second": 149.415, | |
| "eval_steps_per_second": 0.943, | |
| "step": 30720 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0638, | |
| "step": 31680 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.7261307848236347, | |
| "eval_loss": 1.2864927053451538, | |
| "eval_runtime": 50.9275, | |
| "eval_samples_per_second": 149.31, | |
| "eval_steps_per_second": 0.943, | |
| "step": 31680 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.058, | |
| "step": 32640 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7236361200229268, | |
| "eval_loss": 1.3007187843322754, | |
| "eval_runtime": 51.9944, | |
| "eval_samples_per_second": 146.246, | |
| "eval_steps_per_second": 0.923, | |
| "step": 32640 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0548, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.7257316118449666, | |
| "eval_loss": 1.2933671474456787, | |
| "eval_runtime": 51.2859, | |
| "eval_samples_per_second": 148.267, | |
| "eval_steps_per_second": 0.936, | |
| "step": 33600 | |
| } | |
| ], | |
| "max_steps": 38400, | |
| "num_train_epochs": 40, | |
| "total_flos": 2041797358387200.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |