| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.9988571428571429, | |
| "eval_steps": 10, | |
| "global_step": 437, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.022857142857142857, | |
| "grad_norm": 6.883157253265381, | |
| "learning_rate": 9.77116704805492e-05, | |
| "loss": 0.9709, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.022857142857142857, | |
| "eval_accuracy": 0.6398571133613586, | |
| "eval_loss": 0.8923419117927551, | |
| "eval_runtime": 252.6626, | |
| "eval_samples_per_second": 27.705, | |
| "eval_steps_per_second": 6.926, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.045714285714285714, | |
| "grad_norm": 4.793847560882568, | |
| "learning_rate": 9.542334096109841e-05, | |
| "loss": 0.9219, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.045714285714285714, | |
| "eval_accuracy": 0.7664285898208618, | |
| "eval_loss": 0.6903320550918579, | |
| "eval_runtime": 260.5483, | |
| "eval_samples_per_second": 26.866, | |
| "eval_steps_per_second": 6.717, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.06857142857142857, | |
| "grad_norm": 6.191551685333252, | |
| "learning_rate": 9.31350114416476e-05, | |
| "loss": 0.7112, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06857142857142857, | |
| "eval_accuracy": 0.7908571362495422, | |
| "eval_loss": 0.5838488936424255, | |
| "eval_runtime": 254.6091, | |
| "eval_samples_per_second": 27.493, | |
| "eval_steps_per_second": 6.873, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.09142857142857143, | |
| "grad_norm": 9.833272933959961, | |
| "learning_rate": 9.08466819221968e-05, | |
| "loss": 0.567, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.09142857142857143, | |
| "eval_accuracy": 0.8158571720123291, | |
| "eval_loss": 0.5405334830284119, | |
| "eval_runtime": 263.3184, | |
| "eval_samples_per_second": 26.584, | |
| "eval_steps_per_second": 6.646, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.11428571428571428, | |
| "grad_norm": 9.925666809082031, | |
| "learning_rate": 8.878718535469108e-05, | |
| "loss": 0.6184, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.11428571428571428, | |
| "eval_accuracy": 0.8581428527832031, | |
| "eval_loss": 0.41476812958717346, | |
| "eval_runtime": 259.1036, | |
| "eval_samples_per_second": 27.016, | |
| "eval_steps_per_second": 6.754, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.13714285714285715, | |
| "grad_norm": 3.723980665206909, | |
| "learning_rate": 8.649885583524028e-05, | |
| "loss": 0.5291, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.13714285714285715, | |
| "eval_accuracy": 0.8511428833007812, | |
| "eval_loss": 0.44439756870269775, | |
| "eval_runtime": 253.5826, | |
| "eval_samples_per_second": 27.604, | |
| "eval_steps_per_second": 6.901, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "grad_norm": 10.508088111877441, | |
| "learning_rate": 8.421052631578948e-05, | |
| "loss": 0.533, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 0.8271428346633911, | |
| "eval_loss": 0.4642958641052246, | |
| "eval_runtime": 260.9488, | |
| "eval_samples_per_second": 26.825, | |
| "eval_steps_per_second": 6.706, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.18285714285714286, | |
| "grad_norm": 7.824756622314453, | |
| "learning_rate": 8.192219679633868e-05, | |
| "loss": 0.4753, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.18285714285714286, | |
| "eval_accuracy": 0.876714289188385, | |
| "eval_loss": 0.35598087310791016, | |
| "eval_runtime": 262.7831, | |
| "eval_samples_per_second": 26.638, | |
| "eval_steps_per_second": 6.659, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.2057142857142857, | |
| "grad_norm": 5.332316875457764, | |
| "learning_rate": 7.963386727688788e-05, | |
| "loss": 0.4252, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.2057142857142857, | |
| "eval_accuracy": 0.8102856874465942, | |
| "eval_loss": 0.5888535380363464, | |
| "eval_runtime": 262.7552, | |
| "eval_samples_per_second": 26.641, | |
| "eval_steps_per_second": 6.66, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.22857142857142856, | |
| "grad_norm": 17.482688903808594, | |
| "learning_rate": 7.734553775743708e-05, | |
| "loss": 0.5007, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.22857142857142856, | |
| "eval_accuracy": 0.8662857413291931, | |
| "eval_loss": 0.38821107149124146, | |
| "eval_runtime": 261.4572, | |
| "eval_samples_per_second": 26.773, | |
| "eval_steps_per_second": 6.693, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.25142857142857145, | |
| "grad_norm": 8.691084861755371, | |
| "learning_rate": 7.505720823798627e-05, | |
| "loss": 0.5605, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.25142857142857145, | |
| "eval_accuracy": 0.8921428322792053, | |
| "eval_loss": 0.32210296392440796, | |
| "eval_runtime": 261.1514, | |
| "eval_samples_per_second": 26.804, | |
| "eval_steps_per_second": 6.701, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.2742857142857143, | |
| "grad_norm": 11.754142761230469, | |
| "learning_rate": 7.276887871853547e-05, | |
| "loss": 0.4875, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.2742857142857143, | |
| "eval_accuracy": 0.8558571338653564, | |
| "eval_loss": 0.36388570070266724, | |
| "eval_runtime": 265.2182, | |
| "eval_samples_per_second": 26.393, | |
| "eval_steps_per_second": 6.598, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.29714285714285715, | |
| "grad_norm": 7.222925662994385, | |
| "learning_rate": 7.048054919908466e-05, | |
| "loss": 0.4277, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.29714285714285715, | |
| "eval_accuracy": 0.8745714426040649, | |
| "eval_loss": 0.35708051919937134, | |
| "eval_runtime": 264.6016, | |
| "eval_samples_per_second": 26.455, | |
| "eval_steps_per_second": 6.614, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "grad_norm": 6.181695938110352, | |
| "learning_rate": 6.819221967963387e-05, | |
| "loss": 0.3415, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_accuracy": 0.8861428499221802, | |
| "eval_loss": 0.33818891644477844, | |
| "eval_runtime": 262.5039, | |
| "eval_samples_per_second": 26.666, | |
| "eval_steps_per_second": 6.667, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.34285714285714286, | |
| "grad_norm": 8.087543487548828, | |
| "learning_rate": 6.590389016018307e-05, | |
| "loss": 0.413, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.34285714285714286, | |
| "eval_accuracy": 0.9104285836219788, | |
| "eval_loss": 0.2596481442451477, | |
| "eval_runtime": 265.6837, | |
| "eval_samples_per_second": 26.347, | |
| "eval_steps_per_second": 6.587, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.3657142857142857, | |
| "grad_norm": 11.313796997070312, | |
| "learning_rate": 6.361556064073226e-05, | |
| "loss": 0.377, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.3657142857142857, | |
| "eval_accuracy": 0.8711428642272949, | |
| "eval_loss": 0.3518799841403961, | |
| "eval_runtime": 264.3798, | |
| "eval_samples_per_second": 26.477, | |
| "eval_steps_per_second": 6.619, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.38857142857142857, | |
| "grad_norm": 7.65640115737915, | |
| "learning_rate": 6.132723112128147e-05, | |
| "loss": 0.4219, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.38857142857142857, | |
| "eval_accuracy": 0.8947142958641052, | |
| "eval_loss": 0.2979215681552887, | |
| "eval_runtime": 262.8341, | |
| "eval_samples_per_second": 26.633, | |
| "eval_steps_per_second": 6.658, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.4114285714285714, | |
| "grad_norm": 6.2714433670043945, | |
| "learning_rate": 5.903890160183066e-05, | |
| "loss": 0.3317, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4114285714285714, | |
| "eval_accuracy": 0.9225714206695557, | |
| "eval_loss": 0.22266168892383575, | |
| "eval_runtime": 265.1248, | |
| "eval_samples_per_second": 26.403, | |
| "eval_steps_per_second": 6.601, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.4342857142857143, | |
| "grad_norm": 8.710111618041992, | |
| "learning_rate": 5.675057208237986e-05, | |
| "loss": 0.3131, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.4342857142857143, | |
| "eval_accuracy": 0.8692857027053833, | |
| "eval_loss": 0.3680011034011841, | |
| "eval_runtime": 260.0056, | |
| "eval_samples_per_second": 26.923, | |
| "eval_steps_per_second": 6.731, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.45714285714285713, | |
| "grad_norm": 4.041360378265381, | |
| "learning_rate": 5.446224256292907e-05, | |
| "loss": 0.3266, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.45714285714285713, | |
| "eval_accuracy": 0.9308571219444275, | |
| "eval_loss": 0.20981180667877197, | |
| "eval_runtime": 256.153, | |
| "eval_samples_per_second": 27.327, | |
| "eval_steps_per_second": 6.832, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "grad_norm": 10.932918548583984, | |
| "learning_rate": 5.217391304347826e-05, | |
| "loss": 0.3306, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_accuracy": 0.8824285864830017, | |
| "eval_loss": 0.3848917782306671, | |
| "eval_runtime": 253.9958, | |
| "eval_samples_per_second": 27.56, | |
| "eval_steps_per_second": 6.89, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.5028571428571429, | |
| "grad_norm": 9.440160751342773, | |
| "learning_rate": 4.9885583524027466e-05, | |
| "loss": 0.3037, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5028571428571429, | |
| "eval_accuracy": 0.9024285674095154, | |
| "eval_loss": 0.28518444299697876, | |
| "eval_runtime": 259.3612, | |
| "eval_samples_per_second": 26.989, | |
| "eval_steps_per_second": 6.747, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.5257142857142857, | |
| "grad_norm": 9.196854591369629, | |
| "learning_rate": 4.759725400457666e-05, | |
| "loss": 0.3086, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5257142857142857, | |
| "eval_accuracy": 0.9121428728103638, | |
| "eval_loss": 0.272481232881546, | |
| "eval_runtime": 254.9581, | |
| "eval_samples_per_second": 27.455, | |
| "eval_steps_per_second": 6.864, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.5485714285714286, | |
| "grad_norm": 6.610895156860352, | |
| "learning_rate": 4.530892448512586e-05, | |
| "loss": 0.2576, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5485714285714286, | |
| "eval_accuracy": 0.9355714321136475, | |
| "eval_loss": 0.18688350915908813, | |
| "eval_runtime": 255.2292, | |
| "eval_samples_per_second": 27.426, | |
| "eval_steps_per_second": 6.857, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "grad_norm": 15.24905014038086, | |
| "learning_rate": 4.302059496567506e-05, | |
| "loss": 0.2469, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5714285714285714, | |
| "eval_accuracy": 0.9242857098579407, | |
| "eval_loss": 0.2262311726808548, | |
| "eval_runtime": 254.9064, | |
| "eval_samples_per_second": 27.461, | |
| "eval_steps_per_second": 6.865, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.5942857142857143, | |
| "grad_norm": 9.8357515335083, | |
| "learning_rate": 4.073226544622426e-05, | |
| "loss": 0.2405, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.5942857142857143, | |
| "eval_accuracy": 0.9347142577171326, | |
| "eval_loss": 0.19631564617156982, | |
| "eval_runtime": 271.1966, | |
| "eval_samples_per_second": 25.812, | |
| "eval_steps_per_second": 6.453, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.6171428571428571, | |
| "grad_norm": 19.872060775756836, | |
| "learning_rate": 3.844393592677346e-05, | |
| "loss": 0.2802, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.6171428571428571, | |
| "eval_accuracy": 0.8804285526275635, | |
| "eval_loss": 0.3679888844490051, | |
| "eval_runtime": 256.0669, | |
| "eval_samples_per_second": 27.337, | |
| "eval_steps_per_second": 6.834, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "grad_norm": 3.6445915699005127, | |
| "learning_rate": 3.6155606407322653e-05, | |
| "loss": 0.2442, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.9292857050895691, | |
| "eval_loss": 0.20533673465251923, | |
| "eval_runtime": 255.7952, | |
| "eval_samples_per_second": 27.366, | |
| "eval_steps_per_second": 6.841, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.6628571428571428, | |
| "grad_norm": 8.114418983459473, | |
| "learning_rate": 3.3867276887871856e-05, | |
| "loss": 0.2302, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6628571428571428, | |
| "eval_accuracy": 0.8967142701148987, | |
| "eval_loss": 0.3355866074562073, | |
| "eval_runtime": 257.891, | |
| "eval_samples_per_second": 27.143, | |
| "eval_steps_per_second": 6.786, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.6857142857142857, | |
| "grad_norm": 5.993322372436523, | |
| "learning_rate": 3.157894736842105e-05, | |
| "loss": 0.2492, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.6857142857142857, | |
| "eval_accuracy": 0.9371428489685059, | |
| "eval_loss": 0.18795913457870483, | |
| "eval_runtime": 254.5882, | |
| "eval_samples_per_second": 27.495, | |
| "eval_steps_per_second": 6.874, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.7085714285714285, | |
| "grad_norm": 6.529418468475342, | |
| "learning_rate": 2.9290617848970254e-05, | |
| "loss": 0.2089, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.7085714285714285, | |
| "eval_accuracy": 0.928857147693634, | |
| "eval_loss": 0.2076321393251419, | |
| "eval_runtime": 260.5938, | |
| "eval_samples_per_second": 26.862, | |
| "eval_steps_per_second": 6.715, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.7314285714285714, | |
| "grad_norm": 6.433741092681885, | |
| "learning_rate": 2.7002288329519453e-05, | |
| "loss": 0.2824, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7314285714285714, | |
| "eval_accuracy": 0.930142879486084, | |
| "eval_loss": 0.1999480277299881, | |
| "eval_runtime": 255.2396, | |
| "eval_samples_per_second": 27.425, | |
| "eval_steps_per_second": 6.856, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.7542857142857143, | |
| "grad_norm": 5.394837379455566, | |
| "learning_rate": 2.4713958810068652e-05, | |
| "loss": 0.2009, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7542857142857143, | |
| "eval_accuracy": 0.9521428346633911, | |
| "eval_loss": 0.14918017387390137, | |
| "eval_runtime": 258.1497, | |
| "eval_samples_per_second": 27.116, | |
| "eval_steps_per_second": 6.779, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.7771428571428571, | |
| "grad_norm": 5.843348503112793, | |
| "learning_rate": 2.242562929061785e-05, | |
| "loss": 0.2001, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.7771428571428571, | |
| "eval_accuracy": 0.951714277267456, | |
| "eval_loss": 0.14960123598575592, | |
| "eval_runtime": 253.1262, | |
| "eval_samples_per_second": 27.654, | |
| "eval_steps_per_second": 6.914, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "grad_norm": 7.778473377227783, | |
| "learning_rate": 2.0137299771167047e-05, | |
| "loss": 0.2298, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.9490000009536743, | |
| "eval_loss": 0.15794885158538818, | |
| "eval_runtime": 258.4154, | |
| "eval_samples_per_second": 27.088, | |
| "eval_steps_per_second": 6.772, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.8228571428571428, | |
| "grad_norm": 7.672749042510986, | |
| "learning_rate": 1.784897025171625e-05, | |
| "loss": 0.1802, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.8228571428571428, | |
| "eval_accuracy": 0.9501428604125977, | |
| "eval_loss": 0.15056686103343964, | |
| "eval_runtime": 253.0586, | |
| "eval_samples_per_second": 27.662, | |
| "eval_steps_per_second": 6.915, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.8457142857142858, | |
| "grad_norm": 7.994875431060791, | |
| "learning_rate": 1.5560640732265445e-05, | |
| "loss": 0.1914, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8457142857142858, | |
| "eval_accuracy": 0.9311428666114807, | |
| "eval_loss": 0.20363783836364746, | |
| "eval_runtime": 261.3379, | |
| "eval_samples_per_second": 26.785, | |
| "eval_steps_per_second": 6.696, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.8685714285714285, | |
| "grad_norm": 3.988149404525757, | |
| "learning_rate": 1.3272311212814645e-05, | |
| "loss": 0.1897, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8685714285714285, | |
| "eval_accuracy": 0.9382857084274292, | |
| "eval_loss": 0.18375040590763092, | |
| "eval_runtime": 256.8539, | |
| "eval_samples_per_second": 27.253, | |
| "eval_steps_per_second": 6.813, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.8914285714285715, | |
| "grad_norm": 7.280108451843262, | |
| "learning_rate": 1.0983981693363844e-05, | |
| "loss": 0.1203, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.8914285714285715, | |
| "eval_accuracy": 0.9504285454750061, | |
| "eval_loss": 0.1459112912416458, | |
| "eval_runtime": 256.3941, | |
| "eval_samples_per_second": 27.302, | |
| "eval_steps_per_second": 6.825, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.9142857142857143, | |
| "grad_norm": 6.386229991912842, | |
| "learning_rate": 8.695652173913044e-06, | |
| "loss": 0.1372, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9142857142857143, | |
| "eval_accuracy": 0.9418571591377258, | |
| "eval_loss": 0.1748434156179428, | |
| "eval_runtime": 266.7645, | |
| "eval_samples_per_second": 26.24, | |
| "eval_steps_per_second": 6.56, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.9371428571428572, | |
| "grad_norm": 7.714508056640625, | |
| "learning_rate": 6.407322654462243e-06, | |
| "loss": 0.1942, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.9371428571428572, | |
| "eval_accuracy": 0.9405714273452759, | |
| "eval_loss": 0.18131674826145172, | |
| "eval_runtime": 266.6389, | |
| "eval_samples_per_second": 26.253, | |
| "eval_steps_per_second": 6.563, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "grad_norm": 4.493211269378662, | |
| "learning_rate": 4.118993135011442e-06, | |
| "loss": 0.1886, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_accuracy": 0.9509999752044678, | |
| "eval_loss": 0.15357272326946259, | |
| "eval_runtime": 273.0321, | |
| "eval_samples_per_second": 25.638, | |
| "eval_steps_per_second": 6.41, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.9828571428571429, | |
| "grad_norm": 4.66563606262207, | |
| "learning_rate": 1.8306636155606409e-06, | |
| "loss": 0.1872, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9828571428571429, | |
| "eval_accuracy": 0.952571451663971, | |
| "eval_loss": 0.1465713381767273, | |
| "eval_runtime": 266.7172, | |
| "eval_samples_per_second": 26.245, | |
| "eval_steps_per_second": 6.561, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.9988571428571429, | |
| "step": 437, | |
| "total_flos": 1.3128537437918904e+18, | |
| "train_loss": 0.3557066834218442, | |
| "train_runtime": 12202.3201, | |
| "train_samples_per_second": 2.295, | |
| "train_steps_per_second": 0.036 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 437, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 10, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.3128537437918904e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |