| { | |
| "best_metric": 1.0, | |
| "best_model_checkpoint": "bit-50-Pharyngitis\\checkpoint-36", | |
| "epoch": 95.23809523809524, | |
| "global_step": 500, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.7027027027027027, | |
| "eval_loss": 0.6438681483268738, | |
| "eval_runtime": 0.5883, | |
| "eval_samples_per_second": 62.898, | |
| "eval_steps_per_second": 5.1, | |
| "step": 5 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1e-05, | |
| "loss": 0.6378, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_accuracy": 0.8918918918918919, | |
| "eval_loss": 0.4361162483692169, | |
| "eval_runtime": 0.3774, | |
| "eval_samples_per_second": 98.042, | |
| "eval_steps_per_second": 7.949, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 2.86, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.2754928469657898, | |
| "eval_runtime": 0.3737, | |
| "eval_samples_per_second": 99.02, | |
| "eval_steps_per_second": 8.029, | |
| "step": 15 | |
| }, | |
| { | |
| "epoch": 3.81, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4237, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.918918918918919, | |
| "eval_loss": 0.21373549103736877, | |
| "eval_runtime": 0.4065, | |
| "eval_samples_per_second": 91.026, | |
| "eval_steps_per_second": 7.381, | |
| "step": 21 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.1247631162405014, | |
| "eval_runtime": 0.3977, | |
| "eval_samples_per_second": 93.032, | |
| "eval_steps_per_second": 7.543, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 5.71, | |
| "learning_rate": 3e-05, | |
| "loss": 0.2592, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 5.9, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.08167269825935364, | |
| "eval_runtime": 0.3971, | |
| "eval_samples_per_second": 93.166, | |
| "eval_steps_per_second": 7.554, | |
| "step": 31 | |
| }, | |
| { | |
| "epoch": 6.86, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.06954346597194672, | |
| "eval_runtime": 0.3842, | |
| "eval_samples_per_second": 96.305, | |
| "eval_steps_per_second": 7.809, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 7.62, | |
| "learning_rate": 4e-05, | |
| "loss": 0.1775, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.918918918918919, | |
| "eval_loss": 0.23191875219345093, | |
| "eval_runtime": 0.3999, | |
| "eval_samples_per_second": 92.518, | |
| "eval_steps_per_second": 7.501, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 8.95, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.052055422216653824, | |
| "eval_runtime": 0.4011, | |
| "eval_samples_per_second": 92.257, | |
| "eval_steps_per_second": 7.48, | |
| "step": 47 | |
| }, | |
| { | |
| "epoch": 9.52, | |
| "learning_rate": 5e-05, | |
| "loss": 0.1805, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 9.9, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.047812674194574356, | |
| "eval_runtime": 0.3975, | |
| "eval_samples_per_second": 93.076, | |
| "eval_steps_per_second": 7.547, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 10.86, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.04274846613407135, | |
| "eval_runtime": 0.4052, | |
| "eval_samples_per_second": 91.313, | |
| "eval_steps_per_second": 7.404, | |
| "step": 57 | |
| }, | |
| { | |
| "epoch": 11.43, | |
| "learning_rate": 4.888888888888889e-05, | |
| "loss": 0.171, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.8918918918918919, | |
| "eval_loss": 0.1753544956445694, | |
| "eval_runtime": 0.4139, | |
| "eval_samples_per_second": 89.385, | |
| "eval_steps_per_second": 7.247, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 12.95, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.06655322760343552, | |
| "eval_runtime": 0.3887, | |
| "eval_samples_per_second": 95.191, | |
| "eval_steps_per_second": 7.718, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "learning_rate": 4.7777777777777784e-05, | |
| "loss": 0.089, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 13.9, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.02494250051677227, | |
| "eval_runtime": 0.4123, | |
| "eval_samples_per_second": 89.751, | |
| "eval_steps_per_second": 7.277, | |
| "step": 73 | |
| }, | |
| { | |
| "epoch": 14.86, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.019040122628211975, | |
| "eval_runtime": 0.4396, | |
| "eval_samples_per_second": 84.171, | |
| "eval_steps_per_second": 6.825, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 15.24, | |
| "learning_rate": 4.666666666666667e-05, | |
| "loss": 0.1093, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.06111358851194382, | |
| "eval_runtime": 0.4317, | |
| "eval_samples_per_second": 85.699, | |
| "eval_steps_per_second": 6.949, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 16.95, | |
| "eval_accuracy": 0.918918918918919, | |
| "eval_loss": 0.16683633625507355, | |
| "eval_runtime": 0.394, | |
| "eval_samples_per_second": 93.907, | |
| "eval_steps_per_second": 7.614, | |
| "step": 89 | |
| }, | |
| { | |
| "epoch": 17.14, | |
| "learning_rate": 4.555555555555556e-05, | |
| "loss": 0.1025, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 17.9, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.05101567134261131, | |
| "eval_runtime": 0.3847, | |
| "eval_samples_per_second": 96.177, | |
| "eval_steps_per_second": 7.798, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 18.86, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.010445931926369667, | |
| "eval_runtime": 0.3987, | |
| "eval_samples_per_second": 92.805, | |
| "eval_steps_per_second": 7.525, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 19.05, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.12, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8918918918918919, | |
| "eval_loss": 0.18418262898921967, | |
| "eval_runtime": 0.391, | |
| "eval_samples_per_second": 94.632, | |
| "eval_steps_per_second": 7.673, | |
| "step": 105 | |
| }, | |
| { | |
| "epoch": 20.95, | |
| "learning_rate": 4.3333333333333334e-05, | |
| "loss": 0.0996, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 20.95, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.011865493841469288, | |
| "eval_runtime": 0.3899, | |
| "eval_samples_per_second": 94.901, | |
| "eval_steps_per_second": 7.695, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 21.9, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.03581492602825165, | |
| "eval_runtime": 0.4047, | |
| "eval_samples_per_second": 91.415, | |
| "eval_steps_per_second": 7.412, | |
| "step": 115 | |
| }, | |
| { | |
| "epoch": 22.86, | |
| "learning_rate": 4.222222222222222e-05, | |
| "loss": 0.0933, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 22.86, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.11507910490036011, | |
| "eval_runtime": 0.4091, | |
| "eval_samples_per_second": 90.451, | |
| "eval_steps_per_second": 7.334, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.08254072815179825, | |
| "eval_runtime": 0.3864, | |
| "eval_samples_per_second": 95.759, | |
| "eval_steps_per_second": 7.764, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 24.76, | |
| "learning_rate": 4.111111111111111e-05, | |
| "loss": 0.1118, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 24.95, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.08668244630098343, | |
| "eval_runtime": 0.3898, | |
| "eval_samples_per_second": 94.932, | |
| "eval_steps_per_second": 7.697, | |
| "step": 131 | |
| }, | |
| { | |
| "epoch": 25.9, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.009897518903017044, | |
| "eval_runtime": 0.4036, | |
| "eval_samples_per_second": 91.68, | |
| "eval_steps_per_second": 7.434, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 26.67, | |
| "learning_rate": 4e-05, | |
| "loss": 0.0471, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 26.86, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.019395073875784874, | |
| "eval_runtime": 0.3954, | |
| "eval_samples_per_second": 93.584, | |
| "eval_steps_per_second": 7.588, | |
| "step": 141 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0031759734265506268, | |
| "eval_runtime": 0.3912, | |
| "eval_samples_per_second": 94.57, | |
| "eval_steps_per_second": 7.668, | |
| "step": 147 | |
| }, | |
| { | |
| "epoch": 28.57, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 0.0686, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 28.95, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0057038371451199055, | |
| "eval_runtime": 0.3906, | |
| "eval_samples_per_second": 94.728, | |
| "eval_steps_per_second": 7.681, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 29.9, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0014269119128584862, | |
| "eval_runtime": 0.3905, | |
| "eval_samples_per_second": 94.751, | |
| "eval_steps_per_second": 7.682, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 30.48, | |
| "learning_rate": 3.777777777777778e-05, | |
| "loss": 0.0692, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 30.86, | |
| "eval_accuracy": 0.918918918918919, | |
| "eval_loss": 0.34027108550071716, | |
| "eval_runtime": 0.4009, | |
| "eval_samples_per_second": 92.295, | |
| "eval_steps_per_second": 7.483, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.011479969136416912, | |
| "eval_runtime": 0.4005, | |
| "eval_samples_per_second": 92.395, | |
| "eval_steps_per_second": 7.491, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 32.38, | |
| "learning_rate": 3.6666666666666666e-05, | |
| "loss": 0.0912, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 32.95, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.0989471897482872, | |
| "eval_runtime": 0.4052, | |
| "eval_samples_per_second": 91.323, | |
| "eval_steps_per_second": 7.405, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 33.9, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.10641559958457947, | |
| "eval_runtime": 0.3889, | |
| "eval_samples_per_second": 95.131, | |
| "eval_steps_per_second": 7.713, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 34.29, | |
| "learning_rate": 3.555555555555556e-05, | |
| "loss": 0.0994, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 34.86, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.09466935694217682, | |
| "eval_runtime": 0.3912, | |
| "eval_samples_per_second": 94.569, | |
| "eval_steps_per_second": 7.668, | |
| "step": 183 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.10136424005031586, | |
| "eval_runtime": 0.4255, | |
| "eval_samples_per_second": 86.966, | |
| "eval_steps_per_second": 7.051, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 36.19, | |
| "learning_rate": 3.444444444444445e-05, | |
| "loss": 0.0561, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 36.95, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.0519096776843071, | |
| "eval_runtime": 0.4107, | |
| "eval_samples_per_second": 90.096, | |
| "eval_steps_per_second": 7.305, | |
| "step": 194 | |
| }, | |
| { | |
| "epoch": 37.9, | |
| "eval_accuracy": 0.918918918918919, | |
| "eval_loss": 0.16669504344463348, | |
| "eval_runtime": 0.4065, | |
| "eval_samples_per_second": 91.027, | |
| "eval_steps_per_second": 7.381, | |
| "step": 199 | |
| }, | |
| { | |
| "epoch": 38.1, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.0516, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 38.86, | |
| "eval_accuracy": 0.918918918918919, | |
| "eval_loss": 0.3604719936847687, | |
| "eval_runtime": 0.3971, | |
| "eval_samples_per_second": 93.183, | |
| "eval_steps_per_second": 7.555, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 3.222222222222223e-05, | |
| "loss": 0.0535, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.044684119522571564, | |
| "eval_runtime": 0.3775, | |
| "eval_samples_per_second": 98.002, | |
| "eval_steps_per_second": 7.946, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 40.95, | |
| "eval_accuracy": 0.918918918918919, | |
| "eval_loss": 0.1711174100637436, | |
| "eval_runtime": 0.3876, | |
| "eval_samples_per_second": 95.468, | |
| "eval_steps_per_second": 7.741, | |
| "step": 215 | |
| }, | |
| { | |
| "epoch": 41.9, | |
| "learning_rate": 3.111111111111111e-05, | |
| "loss": 0.0475, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 41.9, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.10387804359197617, | |
| "eval_runtime": 0.3953, | |
| "eval_samples_per_second": 93.597, | |
| "eval_steps_per_second": 7.589, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 42.86, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.029212407767772675, | |
| "eval_runtime": 0.3915, | |
| "eval_samples_per_second": 94.5, | |
| "eval_steps_per_second": 7.662, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 43.81, | |
| "learning_rate": 3e-05, | |
| "loss": 0.0504, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.014002898707985878, | |
| "eval_runtime": 0.3889, | |
| "eval_samples_per_second": 95.145, | |
| "eval_steps_per_second": 7.714, | |
| "step": 231 | |
| }, | |
| { | |
| "epoch": 44.95, | |
| "eval_accuracy": 0.8918918918918919, | |
| "eval_loss": 0.27451202273368835, | |
| "eval_runtime": 0.4255, | |
| "eval_samples_per_second": 86.962, | |
| "eval_steps_per_second": 7.051, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 45.71, | |
| "learning_rate": 2.8888888888888888e-05, | |
| "loss": 0.0432, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 45.9, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.002892308635637164, | |
| "eval_runtime": 0.3957, | |
| "eval_samples_per_second": 93.507, | |
| "eval_steps_per_second": 7.582, | |
| "step": 241 | |
| }, | |
| { | |
| "epoch": 46.86, | |
| "eval_accuracy": 0.8648648648648649, | |
| "eval_loss": 0.4316161274909973, | |
| "eval_runtime": 0.4242, | |
| "eval_samples_per_second": 87.218, | |
| "eval_steps_per_second": 7.072, | |
| "step": 246 | |
| }, | |
| { | |
| "epoch": 47.62, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.0992, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.015067849308252335, | |
| "eval_runtime": 0.3902, | |
| "eval_samples_per_second": 94.825, | |
| "eval_steps_per_second": 7.688, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 48.95, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.00749516487121582, | |
| "eval_runtime": 0.3915, | |
| "eval_samples_per_second": 94.506, | |
| "eval_steps_per_second": 7.663, | |
| "step": 257 | |
| }, | |
| { | |
| "epoch": 49.52, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.1531, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 49.9, | |
| "eval_accuracy": 0.8648648648648649, | |
| "eval_loss": 0.3624305725097656, | |
| "eval_runtime": 0.4028, | |
| "eval_samples_per_second": 91.854, | |
| "eval_steps_per_second": 7.448, | |
| "step": 262 | |
| }, | |
| { | |
| "epoch": 50.86, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.036536574363708496, | |
| "eval_runtime": 0.3932, | |
| "eval_samples_per_second": 94.091, | |
| "eval_steps_per_second": 7.629, | |
| "step": 267 | |
| }, | |
| { | |
| "epoch": 51.43, | |
| "learning_rate": 2.5555555555555554e-05, | |
| "loss": 0.0622, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.20016101002693176, | |
| "eval_runtime": 0.4002, | |
| "eval_samples_per_second": 92.444, | |
| "eval_steps_per_second": 7.495, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 52.95, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.12544459104537964, | |
| "eval_runtime": 0.4119, | |
| "eval_samples_per_second": 89.837, | |
| "eval_steps_per_second": 7.284, | |
| "step": 278 | |
| }, | |
| { | |
| "epoch": 53.33, | |
| "learning_rate": 2.4444444444444445e-05, | |
| "loss": 0.0432, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 53.9, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.0909779816865921, | |
| "eval_runtime": 0.3955, | |
| "eval_samples_per_second": 93.545, | |
| "eval_steps_per_second": 7.585, | |
| "step": 283 | |
| }, | |
| { | |
| "epoch": 54.86, | |
| "eval_accuracy": 0.8918918918918919, | |
| "eval_loss": 0.3727685809135437, | |
| "eval_runtime": 0.3993, | |
| "eval_samples_per_second": 92.669, | |
| "eval_steps_per_second": 7.514, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 55.24, | |
| "learning_rate": 2.3333333333333336e-05, | |
| "loss": 0.0531, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.10307420045137405, | |
| "eval_runtime": 0.3958, | |
| "eval_samples_per_second": 93.478, | |
| "eval_steps_per_second": 7.579, | |
| "step": 294 | |
| }, | |
| { | |
| "epoch": 56.95, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.002981973346322775, | |
| "eval_runtime": 0.3986, | |
| "eval_samples_per_second": 92.827, | |
| "eval_steps_per_second": 7.526, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 57.14, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.0731, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 57.9, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.2001199871301651, | |
| "eval_runtime": 0.4153, | |
| "eval_samples_per_second": 89.082, | |
| "eval_steps_per_second": 7.223, | |
| "step": 304 | |
| }, | |
| { | |
| "epoch": 58.86, | |
| "eval_accuracy": 0.918918918918919, | |
| "eval_loss": 0.2389511615037918, | |
| "eval_runtime": 0.3911, | |
| "eval_samples_per_second": 94.615, | |
| "eval_steps_per_second": 7.672, | |
| "step": 309 | |
| }, | |
| { | |
| "epoch": 59.05, | |
| "learning_rate": 2.111111111111111e-05, | |
| "loss": 0.0529, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.13383528590202332, | |
| "eval_runtime": 0.3898, | |
| "eval_samples_per_second": 94.919, | |
| "eval_steps_per_second": 7.696, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 60.95, | |
| "learning_rate": 2e-05, | |
| "loss": 0.0203, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 60.95, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.22631436586380005, | |
| "eval_runtime": 0.3946, | |
| "eval_samples_per_second": 93.757, | |
| "eval_steps_per_second": 7.602, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 61.9, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.16640439629554749, | |
| "eval_runtime": 0.3813, | |
| "eval_samples_per_second": 97.045, | |
| "eval_steps_per_second": 7.868, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 62.86, | |
| "learning_rate": 1.888888888888889e-05, | |
| "loss": 0.0345, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 62.86, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.006216500885784626, | |
| "eval_runtime": 0.3909, | |
| "eval_samples_per_second": 94.661, | |
| "eval_steps_per_second": 7.675, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0056242975406348705, | |
| "eval_runtime": 0.4048, | |
| "eval_samples_per_second": 91.408, | |
| "eval_steps_per_second": 7.411, | |
| "step": 336 | |
| }, | |
| { | |
| "epoch": 64.76, | |
| "learning_rate": 1.777777777777778e-05, | |
| "loss": 0.0595, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 64.95, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.022064007818698883, | |
| "eval_runtime": 0.3988, | |
| "eval_samples_per_second": 92.779, | |
| "eval_steps_per_second": 7.523, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 65.9, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.01646520011126995, | |
| "eval_runtime": 0.4012, | |
| "eval_samples_per_second": 92.225, | |
| "eval_steps_per_second": 7.478, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 66.67, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0278, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 66.86, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.01749444752931595, | |
| "eval_runtime": 0.404, | |
| "eval_samples_per_second": 91.579, | |
| "eval_steps_per_second": 7.425, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.017633311450481415, | |
| "eval_runtime": 0.3933, | |
| "eval_samples_per_second": 94.074, | |
| "eval_steps_per_second": 7.628, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 68.57, | |
| "learning_rate": 1.5555555555555555e-05, | |
| "loss": 0.1035, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 68.95, | |
| "eval_accuracy": 0.918918918918919, | |
| "eval_loss": 0.15788349509239197, | |
| "eval_runtime": 0.3978, | |
| "eval_samples_per_second": 93.022, | |
| "eval_steps_per_second": 7.542, | |
| "step": 362 | |
| }, | |
| { | |
| "epoch": 69.9, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.06550092250108719, | |
| "eval_runtime": 0.3834, | |
| "eval_samples_per_second": 96.515, | |
| "eval_steps_per_second": 7.826, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 70.48, | |
| "learning_rate": 1.4444444444444444e-05, | |
| "loss": 0.0466, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 70.86, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.00981216412037611, | |
| "eval_runtime": 0.4023, | |
| "eval_samples_per_second": 91.96, | |
| "eval_steps_per_second": 7.456, | |
| "step": 372 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.0761876329779625, | |
| "eval_runtime": 0.3968, | |
| "eval_samples_per_second": 93.24, | |
| "eval_steps_per_second": 7.56, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 72.38, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.0719, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 72.95, | |
| "eval_accuracy": 0.9459459459459459, | |
| "eval_loss": 0.26816752552986145, | |
| "eval_runtime": 0.4037, | |
| "eval_samples_per_second": 91.659, | |
| "eval_steps_per_second": 7.432, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 73.9, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.03098950907588005, | |
| "eval_runtime": 0.3999, | |
| "eval_samples_per_second": 92.524, | |
| "eval_steps_per_second": 7.502, | |
| "step": 388 | |
| }, | |
| { | |
| "epoch": 74.29, | |
| "learning_rate": 1.2222222222222222e-05, | |
| "loss": 0.0144, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 74.86, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.0038737300783395767, | |
| "eval_runtime": 0.4143, | |
| "eval_samples_per_second": 89.311, | |
| "eval_steps_per_second": 7.241, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.055370986461639404, | |
| "eval_runtime": 0.387, | |
| "eval_samples_per_second": 95.603, | |
| "eval_steps_per_second": 7.752, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 76.19, | |
| "learning_rate": 1.1111111111111112e-05, | |
| "loss": 0.0613, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 76.95, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.05363788455724716, | |
| "eval_runtime": 0.3953, | |
| "eval_samples_per_second": 93.59, | |
| "eval_steps_per_second": 7.588, | |
| "step": 404 | |
| }, | |
| { | |
| "epoch": 77.9, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.03543579578399658, | |
| "eval_runtime": 0.4021, | |
| "eval_samples_per_second": 92.01, | |
| "eval_steps_per_second": 7.46, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 78.1, | |
| "learning_rate": 1e-05, | |
| "loss": 0.0307, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 78.86, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.02702740766108036, | |
| "eval_runtime": 0.4203, | |
| "eval_samples_per_second": 88.032, | |
| "eval_steps_per_second": 7.138, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "learning_rate": 8.88888888888889e-06, | |
| "loss": 0.0253, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.005909389816224575, | |
| "eval_runtime": 0.3856, | |
| "eval_samples_per_second": 95.942, | |
| "eval_steps_per_second": 7.779, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 80.95, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.01845603436231613, | |
| "eval_runtime": 0.3966, | |
| "eval_samples_per_second": 93.298, | |
| "eval_steps_per_second": 7.565, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 81.9, | |
| "learning_rate": 7.777777777777777e-06, | |
| "loss": 0.0311, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 81.9, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.02441835217177868, | |
| "eval_runtime": 0.4215, | |
| "eval_samples_per_second": 87.778, | |
| "eval_steps_per_second": 7.117, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 82.86, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.030215006321668625, | |
| "eval_runtime": 0.4009, | |
| "eval_samples_per_second": 92.283, | |
| "eval_steps_per_second": 7.482, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 83.81, | |
| "learning_rate": 6.666666666666667e-06, | |
| "loss": 0.0189, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.00978150311857462, | |
| "eval_runtime": 0.4039, | |
| "eval_samples_per_second": 91.607, | |
| "eval_steps_per_second": 7.428, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 84.95, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.004685988184064627, | |
| "eval_runtime": 0.3893, | |
| "eval_samples_per_second": 95.039, | |
| "eval_steps_per_second": 7.706, | |
| "step": 446 | |
| }, | |
| { | |
| "epoch": 85.71, | |
| "learning_rate": 5.555555555555556e-06, | |
| "loss": 0.0235, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 85.9, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.010029388591647148, | |
| "eval_runtime": 0.4017, | |
| "eval_samples_per_second": 92.114, | |
| "eval_steps_per_second": 7.469, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 86.86, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.01385235320776701, | |
| "eval_runtime": 0.409, | |
| "eval_samples_per_second": 90.473, | |
| "eval_steps_per_second": 7.336, | |
| "step": 456 | |
| }, | |
| { | |
| "epoch": 87.62, | |
| "learning_rate": 4.444444444444445e-06, | |
| "loss": 0.014, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.013605994172394276, | |
| "eval_runtime": 0.4002, | |
| "eval_samples_per_second": 92.454, | |
| "eval_steps_per_second": 7.496, | |
| "step": 462 | |
| }, | |
| { | |
| "epoch": 88.95, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.030411923304200172, | |
| "eval_runtime": 0.3929, | |
| "eval_samples_per_second": 94.163, | |
| "eval_steps_per_second": 7.635, | |
| "step": 467 | |
| }, | |
| { | |
| "epoch": 89.52, | |
| "learning_rate": 3.3333333333333333e-06, | |
| "loss": 0.0197, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 89.9, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.04400445148348808, | |
| "eval_runtime": 0.4094, | |
| "eval_samples_per_second": 90.379, | |
| "eval_steps_per_second": 7.328, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 90.86, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.059370577335357666, | |
| "eval_runtime": 0.4037, | |
| "eval_samples_per_second": 91.649, | |
| "eval_steps_per_second": 7.431, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 91.43, | |
| "learning_rate": 2.2222222222222225e-06, | |
| "loss": 0.0309, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.048442043364048004, | |
| "eval_runtime": 0.3924, | |
| "eval_samples_per_second": 94.293, | |
| "eval_steps_per_second": 7.645, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 92.95, | |
| "eval_accuracy": 0.972972972972973, | |
| "eval_loss": 0.031032495200634003, | |
| "eval_runtime": 0.3937, | |
| "eval_samples_per_second": 93.991, | |
| "eval_steps_per_second": 7.621, | |
| "step": 488 | |
| }, | |
| { | |
| "epoch": 93.33, | |
| "learning_rate": 1.1111111111111112e-06, | |
| "loss": 0.0197, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 93.9, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.019568899646401405, | |
| "eval_runtime": 0.4189, | |
| "eval_samples_per_second": 88.319, | |
| "eval_steps_per_second": 7.161, | |
| "step": 493 | |
| }, | |
| { | |
| "epoch": 94.86, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.014562019146978855, | |
| "eval_runtime": 0.3925, | |
| "eval_samples_per_second": 94.275, | |
| "eval_steps_per_second": 7.644, | |
| "step": 498 | |
| }, | |
| { | |
| "epoch": 95.24, | |
| "learning_rate": 0.0, | |
| "loss": 0.0106, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 95.24, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.014045949093997478, | |
| "eval_runtime": 0.394, | |
| "eval_samples_per_second": 93.898, | |
| "eval_steps_per_second": 7.613, | |
| "step": 500 | |
| } | |
| ], | |
| "max_steps": 500, | |
| "num_train_epochs": 100, | |
| "total_flos": 2.628508796024832e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |