mixed_yelp_amzn / test_outputs.jsonl
tarabfish's picture
Upload folder using huggingface_hub
3d8d293 verified
{"step": 100, "loss": 0.5058485269546509, "accuracy": 0.875, "runtime": 7.3658, "samples_per_second": 86.888, "steps_per_second": 43.444}
{"step": 200, "loss": 0.8674852252006531, "accuracy": 0.8046875, "runtime": 7.2487, "samples_per_second": 88.292, "steps_per_second": 44.146}
{"step": 300, "loss": 0.5179504752159119, "accuracy": 0.9, "runtime": 7.3532, "samples_per_second": 87.037, "steps_per_second": 43.518}
{"step": 400, "loss": 0.5001088380813599, "accuracy": 0.884375, "runtime": 7.4073, "samples_per_second": 86.401, "steps_per_second": 43.2}
{"step": 500, "loss": 0.5095179080963135, "accuracy": 0.8703125, "runtime": 7.7108, "samples_per_second": 83.0, "steps_per_second": 41.5}
{"step": 600, "loss": 0.41660434007644653, "accuracy": 0.9171875, "runtime": 7.5175, "samples_per_second": 85.135, "steps_per_second": 42.567}
{"step": 700, "loss": 0.44803065061569214, "accuracy": 0.9171875, "runtime": 7.7797, "samples_per_second": 82.265, "steps_per_second": 41.133}
{"step": 800, "loss": 0.37059149146080017, "accuracy": 0.9046875, "runtime": 8.2971, "samples_per_second": 77.136, "steps_per_second": 38.568}
{"step": 900, "loss": 0.39890074729919434, "accuracy": 0.9140625, "runtime": 8.5249, "samples_per_second": 75.074, "steps_per_second": 37.537}
{"step": 1000, "loss": 0.3408961892127991, "accuracy": 0.9265625, "runtime": 8.7034, "samples_per_second": 73.534, "steps_per_second": 36.767}
{"step": 1100, "loss": 0.4134843945503235, "accuracy": 0.9, "runtime": 8.7978, "samples_per_second": 72.746, "steps_per_second": 36.373}
{"step": 1200, "loss": 0.5976473093032837, "accuracy": 0.8921875, "runtime": 8.9215, "samples_per_second": 71.737, "steps_per_second": 35.868}
{"step": 1300, "loss": 0.3381514549255371, "accuracy": 0.9328125, "runtime": 8.6965, "samples_per_second": 73.593, "steps_per_second": 36.797}
{"step": 1400, "loss": 0.3733733296394348, "accuracy": 0.909375, "runtime": 9.1312, "samples_per_second": 70.089, "steps_per_second": 35.045}
{"step": 1500, "loss": 0.3511451184749603, "accuracy": 0.9078125, "runtime": 8.6916, "samples_per_second": 73.635, "steps_per_second": 36.817}
{"step": 1600, "loss": 0.5320402383804321, "accuracy": 0.9078125, "runtime": 8.7704, "samples_per_second": 72.973, "steps_per_second": 36.486}
{"step": 1700, "loss": 0.382881760597229, "accuracy": 0.9140625, "runtime": 9.2164, "samples_per_second": 69.441, "steps_per_second": 34.721}
{"step": 1800, "loss": 0.4133937358856201, "accuracy": 0.9296875, "runtime": 9.2041, "samples_per_second": 69.534, "steps_per_second": 34.767}
{"step": 1900, "loss": 0.3688477873802185, "accuracy": 0.9171875, "runtime": 9.3781, "samples_per_second": 68.244, "steps_per_second": 34.122}
{"step": 2000, "loss": 0.49557924270629883, "accuracy": 0.8890625, "runtime": 9.141, "samples_per_second": 70.014, "steps_per_second": 35.007}
{"step": 2100, "loss": 0.40696582198143005, "accuracy": 0.9125, "runtime": 9.1886, "samples_per_second": 69.652, "steps_per_second": 34.826}
{"step": 2200, "loss": 0.35317379236221313, "accuracy": 0.9328125, "runtime": 8.7465, "samples_per_second": 73.172, "steps_per_second": 36.586}
{"step": 2300, "loss": 0.34157508611679077, "accuracy": 0.9265625, "runtime": 8.8511, "samples_per_second": 72.308, "steps_per_second": 36.154}
{"step": 2400, "loss": 0.37640485167503357, "accuracy": 0.9359375, "runtime": 8.8001, "samples_per_second": 72.726, "steps_per_second": 36.363}
{"step": 2500, "loss": 0.46027231216430664, "accuracy": 0.9015625, "runtime": 8.8168, "samples_per_second": 72.588, "steps_per_second": 36.294}
{"step": 2600, "loss": 0.35136550664901733, "accuracy": 0.9390625, "runtime": 8.9432, "samples_per_second": 71.563, "steps_per_second": 35.782}
{"step": 2700, "loss": 0.3576732575893402, "accuracy": 0.9265625, "runtime": 8.7379, "samples_per_second": 73.244, "steps_per_second": 36.622}
{"step": 2800, "loss": 0.498345285654068, "accuracy": 0.925, "runtime": 8.7503, "samples_per_second": 73.14, "steps_per_second": 36.57}
{"step": 2900, "loss": 0.34418749809265137, "accuracy": 0.928125, "runtime": 9.0772, "samples_per_second": 70.506, "steps_per_second": 35.253}
{"step": 3000, "loss": 0.39781031012535095, "accuracy": 0.9140625, "runtime": 8.9363, "samples_per_second": 71.618, "steps_per_second": 35.809}
{"step": 3100, "loss": 0.4678925573825836, "accuracy": 0.7671875, "runtime": 8.8414, "samples_per_second": 72.387, "steps_per_second": 36.193}
{"step": 3200, "loss": 0.2849738895893097, "accuracy": 0.921875, "runtime": 8.8288, "samples_per_second": 72.49, "steps_per_second": 36.245}
{"step": 3300, "loss": 0.31825023889541626, "accuracy": 0.9375, "runtime": 8.8923, "samples_per_second": 71.972, "steps_per_second": 35.986}
{"step": 3400, "loss": 0.3107290267944336, "accuracy": 0.9296875, "runtime": 8.8157, "samples_per_second": 72.598, "steps_per_second": 36.299}
{"step": 3500, "loss": 0.34294435381889343, "accuracy": 0.93125, "runtime": 9.4176, "samples_per_second": 67.958, "steps_per_second": 33.979}
{"step": 3600, "loss": 0.5562206506729126, "accuracy": 0.909375, "runtime": 9.374, "samples_per_second": 68.274, "steps_per_second": 34.137}
{"step": 3700, "loss": 0.41777855157852173, "accuracy": 0.9125, "runtime": 9.0592, "samples_per_second": 70.647, "steps_per_second": 35.323}
{"step": 3800, "loss": 0.33964020013809204, "accuracy": 0.93125, "runtime": 8.8936, "samples_per_second": 71.962, "steps_per_second": 35.981}
{"step": 3900, "loss": 0.351996511220932, "accuracy": 0.9296875, "runtime": 8.8405, "samples_per_second": 72.394, "steps_per_second": 36.197}
{"step": 4000, "loss": 0.42133086919784546, "accuracy": 0.915625, "runtime": 8.8251, "samples_per_second": 72.52, "steps_per_second": 36.26}
{"step": 4100, "loss": 0.34062179923057556, "accuracy": 0.9234375, "runtime": 9.2944, "samples_per_second": 68.858, "steps_per_second": 34.429}
{"step": 4200, "loss": 0.46695131063461304, "accuracy": 0.9171875, "runtime": 9.3076, "samples_per_second": 68.761, "steps_per_second": 34.381}
{"step": 4300, "loss": 0.6883209943771362, "accuracy": 0.8765625, "runtime": 8.7495, "samples_per_second": 73.147, "steps_per_second": 36.574}
{"step": 4400, "loss": 0.4490025043487549, "accuracy": 0.909375, "runtime": 8.7873, "samples_per_second": 72.832, "steps_per_second": 36.416}
{"step": 4500, "loss": 0.46545910835266113, "accuracy": 0.9203125, "runtime": 9.2476, "samples_per_second": 69.207, "steps_per_second": 34.604}
{"step": 4600, "loss": 0.34073182940483093, "accuracy": 0.925, "runtime": 8.8293, "samples_per_second": 72.486, "steps_per_second": 36.243}
{"step": 4700, "loss": 0.3223710060119629, "accuracy": 0.9234375, "runtime": 8.7509, "samples_per_second": 73.135, "steps_per_second": 36.567}
{"step": 4800, "loss": 0.2733241021633148, "accuracy": 0.9234375, "runtime": 9.0145, "samples_per_second": 70.997, "steps_per_second": 35.499}
{"step": 4900, "loss": 0.4742125868797302, "accuracy": 0.9078125, "runtime": 9.683, "samples_per_second": 66.095, "steps_per_second": 33.047}
{"step": 5000, "loss": 0.4030113220214844, "accuracy": 0.9328125, "runtime": 8.8473, "samples_per_second": 72.338, "steps_per_second": 36.169}
{"step": 5100, "loss": 0.3181467652320862, "accuracy": 0.9421875, "runtime": 8.9064, "samples_per_second": 71.859, "steps_per_second": 35.929}
{"step": 5200, "loss": 0.4094577729701996, "accuracy": 0.9390625, "runtime": 8.7925, "samples_per_second": 72.789, "steps_per_second": 36.395}
{"step": 5300, "loss": 0.33523690700531006, "accuracy": 0.940625, "runtime": 8.9994, "samples_per_second": 71.116, "steps_per_second": 35.558}
{"step": 5400, "loss": 0.29747968912124634, "accuracy": 0.94375, "runtime": 8.7416, "samples_per_second": 73.214, "steps_per_second": 36.607}
{"step": 5500, "loss": 0.3509012758731842, "accuracy": 0.9359375, "runtime": 8.7571, "samples_per_second": 73.083, "steps_per_second": 36.542}
{"step": 5600, "loss": 0.38290369510650635, "accuracy": 0.9234375, "runtime": 8.8384, "samples_per_second": 72.412, "steps_per_second": 36.206}
{"step": 5700, "loss": 0.41320666670799255, "accuracy": 0.928125, "runtime": 8.9468, "samples_per_second": 71.534, "steps_per_second": 35.767}
{"step": 5800, "loss": 0.476837694644928, "accuracy": 0.9140625, "runtime": 9.3648, "samples_per_second": 68.341, "steps_per_second": 34.171}
{"step": 5900, "loss": 0.35115379095077515, "accuracy": 0.940625, "runtime": 9.3709, "samples_per_second": 68.297, "steps_per_second": 34.148}
{"step": 6000, "loss": 0.40529942512512207, "accuracy": 0.9359375, "runtime": 8.7962, "samples_per_second": 72.759, "steps_per_second": 36.379}
{"step": 6100, "loss": 0.43945884704589844, "accuracy": 0.9203125, "runtime": 9.0469, "samples_per_second": 70.743, "steps_per_second": 35.371}
{"step": 6200, "loss": 0.29970142245292664, "accuracy": 0.9484375, "runtime": 8.8135, "samples_per_second": 72.615, "steps_per_second": 36.308}
{"step": 6300, "loss": 0.3428644835948944, "accuracy": 0.9375, "runtime": 8.9315, "samples_per_second": 71.656, "steps_per_second": 35.828}
{"step": 6400, "loss": 0.3321893811225891, "accuracy": 0.940625, "runtime": 9.3484, "samples_per_second": 68.461, "steps_per_second": 34.231}
{"step": 6500, "loss": 0.2958565354347229, "accuracy": 0.9484375, "runtime": 9.4446, "samples_per_second": 67.763, "steps_per_second": 33.882}
{"step": 6600, "loss": 0.33706173300743103, "accuracy": 0.9390625, "runtime": 8.7744, "samples_per_second": 72.939, "steps_per_second": 36.47}
{"step": 6700, "loss": 0.38445186614990234, "accuracy": 0.9328125, "runtime": 8.8287, "samples_per_second": 72.491, "steps_per_second": 36.245}
{"step": 6800, "loss": 0.4723990559577942, "accuracy": 0.915625, "runtime": 8.9684, "samples_per_second": 71.362, "steps_per_second": 35.681}
{"step": 6900, "loss": 0.2975652813911438, "accuracy": 0.9359375, "runtime": 8.801, "samples_per_second": 72.719, "steps_per_second": 36.359}
{"step": 7000, "loss": 0.4020497798919678, "accuracy": 0.940625, "runtime": 8.9612, "samples_per_second": 71.419, "steps_per_second": 35.709}
{"step": 7100, "loss": 0.33299142122268677, "accuracy": 0.9453125, "runtime": 8.9573, "samples_per_second": 71.45, "steps_per_second": 35.725}
{"step": 7200, "loss": 0.4184952676296234, "accuracy": 0.9296875, "runtime": 9.3499, "samples_per_second": 68.45, "steps_per_second": 34.225}
{"step": 7300, "loss": 0.39886870980262756, "accuracy": 0.9421875, "runtime": 8.8198, "samples_per_second": 72.564, "steps_per_second": 36.282}
{"step": 7400, "loss": 0.36387819051742554, "accuracy": 0.9390625, "runtime": 8.8447, "samples_per_second": 72.36, "steps_per_second": 36.18}
{"step": 7500, "loss": 0.4295511245727539, "accuracy": 0.928125, "runtime": 8.7707, "samples_per_second": 72.97, "steps_per_second": 36.485}
{"step": 7600, "loss": 0.44685807824134827, "accuracy": 0.9328125, "runtime": 8.7667, "samples_per_second": 73.003, "steps_per_second": 36.502}
{"step": 7700, "loss": 0.417947381734848, "accuracy": 0.928125, "runtime": 9.3545, "samples_per_second": 68.417, "steps_per_second": 34.208}
{"step": 7800, "loss": 0.39682143926620483, "accuracy": 0.9296875, "runtime": 9.2707, "samples_per_second": 69.034, "steps_per_second": 34.517}
{"step": 7900, "loss": 0.4272189140319824, "accuracy": 0.921875, "runtime": 9.4531, "samples_per_second": 67.703, "steps_per_second": 33.851}
{"step": 8000, "loss": 0.383857786655426, "accuracy": 0.9421875, "runtime": 9.2884, "samples_per_second": 68.903, "steps_per_second": 34.451}
{"step": 8100, "loss": 0.42631444334983826, "accuracy": 0.9265625, "runtime": 8.8015, "samples_per_second": 72.715, "steps_per_second": 36.357}
{"step": 8200, "loss": 0.4681987166404724, "accuracy": 0.925, "runtime": 8.7624, "samples_per_second": 73.039, "steps_per_second": 36.52}
{"step": 8300, "loss": 0.33258500695228577, "accuracy": 0.9359375, "runtime": 9.666, "samples_per_second": 66.212, "steps_per_second": 33.106}
{"step": 8400, "loss": 0.32588452100753784, "accuracy": 0.9484375, "runtime": 8.7043, "samples_per_second": 73.527, "steps_per_second": 36.763}
{"step": 8500, "loss": 0.5321284532546997, "accuracy": 0.915625, "runtime": 8.8052, "samples_per_second": 72.684, "steps_per_second": 36.342}
{"step": 8600, "loss": 0.42181625962257385, "accuracy": 0.9234375, "runtime": 8.8565, "samples_per_second": 72.263, "steps_per_second": 36.132}
{"step": 8700, "loss": 0.4531926214694977, "accuracy": 0.9109375, "runtime": 8.7717, "samples_per_second": 72.962, "steps_per_second": 36.481}
{"step": 8800, "loss": 0.391295850276947, "accuracy": 0.9359375, "runtime": 8.9324, "samples_per_second": 71.649, "steps_per_second": 35.825}
{"step": 8900, "loss": 0.3494811952114105, "accuracy": 0.9390625, "runtime": 8.8079, "samples_per_second": 72.662, "steps_per_second": 36.331}
{"step": 9000, "loss": 0.32174891233444214, "accuracy": 0.9453125, "runtime": 8.8293, "samples_per_second": 72.486, "steps_per_second": 36.243}
{"step": 9100, "loss": 0.43682727217674255, "accuracy": 0.9265625, "runtime": 8.8193, "samples_per_second": 72.568, "steps_per_second": 36.284}
{"step": 9200, "loss": 0.32130131125450134, "accuracy": 0.934375, "runtime": 8.9488, "samples_per_second": 71.518, "steps_per_second": 35.759}
{"step": 9300, "loss": 0.39666613936424255, "accuracy": 0.9390625, "runtime": 8.8005, "samples_per_second": 72.723, "steps_per_second": 36.361}
{"step": 9400, "loss": 0.37379029393196106, "accuracy": 0.9375, "runtime": 8.928, "samples_per_second": 71.685, "steps_per_second": 35.842}
{"step": 9500, "loss": 0.31776610016822815, "accuracy": 0.9453125, "runtime": 9.2982, "samples_per_second": 68.831, "steps_per_second": 34.415}
{"step": 9600, "loss": 0.34211957454681396, "accuracy": 0.9421875, "runtime": 8.8674, "samples_per_second": 72.175, "steps_per_second": 36.087}
{"step": 9700, "loss": 0.3393772542476654, "accuracy": 0.946875, "runtime": 8.8471, "samples_per_second": 72.34, "steps_per_second": 36.17}
{"step": 9800, "loss": 0.4749112129211426, "accuracy": 0.934375, "runtime": 8.809, "samples_per_second": 72.653, "steps_per_second": 36.327}
{"step": 9900, "loss": 0.42921629548072815, "accuracy": 0.9375, "runtime": 8.8016, "samples_per_second": 72.714, "steps_per_second": 36.357}
{"step": 10000, "loss": 0.36212417483329773, "accuracy": 0.94375, "runtime": 8.8036, "samples_per_second": 72.697, "steps_per_second": 36.349}
{"step": 10100, "loss": 0.4087269902229309, "accuracy": 0.940625, "runtime": 8.8507, "samples_per_second": 72.31, "steps_per_second": 36.155}
{"step": 10200, "loss": 0.4213874936103821, "accuracy": 0.9390625, "runtime": 9.5794, "samples_per_second": 66.81, "steps_per_second": 33.405}
{"step": 10300, "loss": 0.4520658552646637, "accuracy": 0.9390625, "runtime": 9.2691, "samples_per_second": 69.047, "steps_per_second": 34.523}
{"step": 10400, "loss": 0.42120012640953064, "accuracy": 0.9421875, "runtime": 8.9361, "samples_per_second": 71.619, "steps_per_second": 35.81}
{"step": 10500, "loss": 0.5749198198318481, "accuracy": 0.93125, "runtime": 8.8168, "samples_per_second": 72.589, "steps_per_second": 36.294}
{"step": 10600, "loss": 0.4454590678215027, "accuracy": 0.9359375, "runtime": 8.7972, "samples_per_second": 72.75, "steps_per_second": 36.375}
{"step": 10700, "loss": 0.4158696234226227, "accuracy": 0.9359375, "runtime": 9.04, "samples_per_second": 70.797, "steps_per_second": 35.398}
{"step": 10800, "loss": 0.4291527271270752, "accuracy": 0.9359375, "runtime": 9.5333, "samples_per_second": 67.133, "steps_per_second": 33.566}
{"step": 10900, "loss": 0.4440538287162781, "accuracy": 0.9359375, "runtime": 9.3116, "samples_per_second": 68.732, "steps_per_second": 34.366}
{"step": 11000, "loss": 0.4737754762172699, "accuracy": 0.9375, "runtime": 9.2952, "samples_per_second": 68.853, "steps_per_second": 34.426}
{"step": 11100, "loss": 0.41139650344848633, "accuracy": 0.94375, "runtime": 9.3075, "samples_per_second": 68.762, "steps_per_second": 34.381}
{"step": 11200, "loss": 0.5059427618980408, "accuracy": 0.93125, "runtime": 8.8717, "samples_per_second": 72.139, "steps_per_second": 36.07}
{"step": 11300, "loss": 0.47976192831993103, "accuracy": 0.9359375, "runtime": 8.9822, "samples_per_second": 71.252, "steps_per_second": 35.626}
{"step": 11400, "loss": 0.43349307775497437, "accuracy": 0.940625, "runtime": 9.4393, "samples_per_second": 67.801, "steps_per_second": 33.901}
{"step": 11500, "loss": 0.44856634736061096, "accuracy": 0.940625, "runtime": 9.3224, "samples_per_second": 68.652, "steps_per_second": 34.326}
{"step": 11600, "loss": 0.42223042249679565, "accuracy": 0.9375, "runtime": 9.0449, "samples_per_second": 70.758, "steps_per_second": 35.379}
{"step": 11700, "loss": 0.3613812327384949, "accuracy": 0.946875, "runtime": 9.3528, "samples_per_second": 68.429, "steps_per_second": 34.214}
{"step": 11800, "loss": 0.339961439371109, "accuracy": 0.953125, "runtime": 8.9899, "samples_per_second": 71.191, "steps_per_second": 35.596}
{"step": 11900, "loss": 0.3741569519042969, "accuracy": 0.9515625, "runtime": 8.8222, "samples_per_second": 72.544, "steps_per_second": 36.272}
{"step": 12000, "loss": 0.4057230055332184, "accuracy": 0.946875, "runtime": 8.8161, "samples_per_second": 72.595, "steps_per_second": 36.297}
{"step": 12100, "loss": 0.435252845287323, "accuracy": 0.940625, "runtime": 8.7878, "samples_per_second": 72.828, "steps_per_second": 36.414}
{"step": 12200, "loss": 0.44900035858154297, "accuracy": 0.9390625, "runtime": 9.3177, "samples_per_second": 68.687, "steps_per_second": 34.343}
{"step": 12300, "loss": 0.4357311725616455, "accuracy": 0.9375, "runtime": 8.8732, "samples_per_second": 72.127, "steps_per_second": 36.064}
{"step": 12400, "loss": 0.34688371419906616, "accuracy": 0.946875, "runtime": 8.8455, "samples_per_second": 72.353, "steps_per_second": 36.177}
{"step": 12500, "loss": 0.4061656594276428, "accuracy": 0.940625, "runtime": 8.8162, "samples_per_second": 72.594, "steps_per_second": 36.297}
{"step": 12600, "loss": 0.3711402714252472, "accuracy": 0.9484375, "runtime": 9.3388, "samples_per_second": 68.531, "steps_per_second": 34.266}
{"step": 12700, "loss": 0.44172996282577515, "accuracy": 0.934375, "runtime": 9.3467, "samples_per_second": 68.473, "steps_per_second": 34.237}
{"step": 12800, "loss": 0.5394253730773926, "accuracy": 0.925, "runtime": 9.4978, "samples_per_second": 67.384, "steps_per_second": 33.692}
{"step": 12900, "loss": 0.5350068211555481, "accuracy": 0.928125, "runtime": 8.9103, "samples_per_second": 71.827, "steps_per_second": 35.914}
{"step": 13000, "loss": 0.42624321579933167, "accuracy": 0.94375, "runtime": 9.3868, "samples_per_second": 68.181, "steps_per_second": 34.09}
{"step": 13100, "loss": 0.3717637360095978, "accuracy": 0.940625, "runtime": 9.4135, "samples_per_second": 67.987, "steps_per_second": 33.994}
{"step": 13200, "loss": 0.3937380313873291, "accuracy": 0.94375, "runtime": 8.8416, "samples_per_second": 72.385, "steps_per_second": 36.192}
{"step": 13300, "loss": 0.5323719382286072, "accuracy": 0.9296875, "runtime": 9.1682, "samples_per_second": 69.806, "steps_per_second": 34.903}
{"step": 13400, "loss": 0.5047850012779236, "accuracy": 0.9265625, "runtime": 8.91, "samples_per_second": 71.829, "steps_per_second": 35.915}
{"step": 13500, "loss": 0.43117013573646545, "accuracy": 0.940625, "runtime": 9.3094, "samples_per_second": 68.748, "steps_per_second": 34.374}
{"step": 13600, "loss": 0.43228188157081604, "accuracy": 0.94375, "runtime": 9.3726, "samples_per_second": 68.284, "steps_per_second": 34.142}
{"step": 13700, "loss": 0.46011441946029663, "accuracy": 0.9421875, "runtime": 9.3195, "samples_per_second": 68.673, "steps_per_second": 34.336}
{"step": 13800, "loss": 0.45065832138061523, "accuracy": 0.94375, "runtime": 8.8493, "samples_per_second": 72.322, "steps_per_second": 36.161}
{"step": 13900, "loss": 0.4434789717197418, "accuracy": 0.940625, "runtime": 8.7416, "samples_per_second": 73.213, "steps_per_second": 36.607}
{"step": 14000, "loss": 0.4730510711669922, "accuracy": 0.934375, "runtime": 9.3415, "samples_per_second": 68.511, "steps_per_second": 34.256}
{"step": 14100, "loss": 0.43958553671836853, "accuracy": 0.9421875, "runtime": 9.3073, "samples_per_second": 68.763, "steps_per_second": 34.382}
{"step": 14200, "loss": 0.41356101632118225, "accuracy": 0.946875, "runtime": 8.882, "samples_per_second": 72.056, "steps_per_second": 36.028}
{"step": 14300, "loss": 0.4097806513309479, "accuracy": 0.9421875, "runtime": 9.494, "samples_per_second": 67.411, "steps_per_second": 33.706}
{"step": 14400, "loss": 0.4117391109466553, "accuracy": 0.9453125, "runtime": 9.3388, "samples_per_second": 68.531, "steps_per_second": 34.265}
{"step": 14500, "loss": 0.4265193045139313, "accuracy": 0.946875, "runtime": 8.8281, "samples_per_second": 72.496, "steps_per_second": 36.248}
{"step": 14600, "loss": 0.42982369661331177, "accuracy": 0.946875, "runtime": 9.3744, "samples_per_second": 68.271, "steps_per_second": 34.135}
{"step": 14700, "loss": 0.44320765137672424, "accuracy": 0.946875, "runtime": 9.292, "samples_per_second": 68.877, "steps_per_second": 34.438}
{"step": 14800, "loss": 0.4875010550022125, "accuracy": 0.940625, "runtime": 8.8134, "samples_per_second": 72.616, "steps_per_second": 36.308}
{"step": 14900, "loss": 0.49391189217567444, "accuracy": 0.94375, "runtime": 8.7377, "samples_per_second": 73.246, "steps_per_second": 36.623}
{"step": 15000, "loss": 0.5050318837165833, "accuracy": 0.9421875, "runtime": 9.6149, "samples_per_second": 66.563, "steps_per_second": 33.282}
{"step": 15100, "loss": 0.6182016134262085, "accuracy": 0.928125, "runtime": 8.9341, "samples_per_second": 71.636, "steps_per_second": 35.818}
{"step": 15200, "loss": 0.46913862228393555, "accuracy": 0.940625, "runtime": 8.8317, "samples_per_second": 72.466, "steps_per_second": 36.233}
{"step": 15300, "loss": 0.49129217863082886, "accuracy": 0.9375, "runtime": 9.0427, "samples_per_second": 70.775, "steps_per_second": 35.388}
{"step": 15400, "loss": 0.48806220293045044, "accuracy": 0.9375, "runtime": 8.8337, "samples_per_second": 72.45, "steps_per_second": 36.225}
{"step": 15500, "loss": 0.45531249046325684, "accuracy": 0.94375, "runtime": 8.8278, "samples_per_second": 72.498, "steps_per_second": 36.249}
{"step": 15600, "loss": 0.4610361158847809, "accuracy": 0.94375, "runtime": 8.8761, "samples_per_second": 72.104, "steps_per_second": 36.052}
{"step": 15700, "loss": 0.4821533262729645, "accuracy": 0.9375, "runtime": 8.8239, "samples_per_second": 72.53, "steps_per_second": 36.265}
{"step": 15800, "loss": 0.43205922842025757, "accuracy": 0.9453125, "runtime": 8.8708, "samples_per_second": 72.147, "steps_per_second": 36.073}
{"step": 15900, "loss": 0.4587193429470062, "accuracy": 0.9421875, "runtime": 9.3589, "samples_per_second": 68.384, "steps_per_second": 34.192}
{"step": 16000, "loss": 0.5391696691513062, "accuracy": 0.9328125, "runtime": 8.7424, "samples_per_second": 73.207, "steps_per_second": 36.603}
{"step": 16100, "loss": 0.500228762626648, "accuracy": 0.9375, "runtime": 9.3468, "samples_per_second": 68.472, "steps_per_second": 34.236}
{"step": 16200, "loss": 0.4977567791938782, "accuracy": 0.9359375, "runtime": 8.8578, "samples_per_second": 72.253, "steps_per_second": 36.127}
{"step": 16300, "loss": 0.5331653952598572, "accuracy": 0.934375, "runtime": 8.9473, "samples_per_second": 71.53, "steps_per_second": 35.765}
{"step": 16400, "loss": 0.5423542261123657, "accuracy": 0.934375, "runtime": 8.8383, "samples_per_second": 72.412, "steps_per_second": 36.206}
{"step": 16500, "loss": 0.5335698127746582, "accuracy": 0.934375, "runtime": 9.4218, "samples_per_second": 67.927, "steps_per_second": 33.964}
{"step": 16600, "loss": 0.5431577563285828, "accuracy": 0.934375, "runtime": 8.9932, "samples_per_second": 71.165, "steps_per_second": 35.582}
{"step": 16700, "loss": 0.5473291873931885, "accuracy": 0.934375, "runtime": 9.4283, "samples_per_second": 67.881, "steps_per_second": 33.941}
{"step": 16800, "loss": 0.5841249823570251, "accuracy": 0.934375, "runtime": 8.8333, "samples_per_second": 72.453, "steps_per_second": 36.226}
{"step": 16900, "loss": 0.5043960809707642, "accuracy": 0.9421875, "runtime": 8.8233, "samples_per_second": 72.536, "steps_per_second": 36.268}
{"step": 17000, "loss": 0.5455751419067383, "accuracy": 0.9359375, "runtime": 8.8281, "samples_per_second": 72.496, "steps_per_second": 36.248}
{"step": 17100, "loss": 0.5543818473815918, "accuracy": 0.9359375, "runtime": 9.0149, "samples_per_second": 70.994, "steps_per_second": 35.497}
{"step": 17200, "loss": 0.5138991475105286, "accuracy": 0.9421875, "runtime": 8.9481, "samples_per_second": 71.523, "steps_per_second": 35.762}
{"step": 17300, "loss": 0.5258229374885559, "accuracy": 0.940625, "runtime": 8.8132, "samples_per_second": 72.618, "steps_per_second": 36.309}
{"step": 17400, "loss": 0.5699753761291504, "accuracy": 0.934375, "runtime": 9.4072, "samples_per_second": 68.033, "steps_per_second": 34.017}
{"step": 17500, "loss": 0.5650780200958252, "accuracy": 0.934375, "runtime": 9.3234, "samples_per_second": 68.644, "steps_per_second": 34.322}
{"step": 17600, "loss": 0.5291606187820435, "accuracy": 0.9390625, "runtime": 8.7877, "samples_per_second": 72.829, "steps_per_second": 36.415}
{"step": 17700, "loss": 0.5055230855941772, "accuracy": 0.9375, "runtime": 9.4127, "samples_per_second": 67.993, "steps_per_second": 33.997}
{"step": 17800, "loss": 0.5072504878044128, "accuracy": 0.9375, "runtime": 8.8627, "samples_per_second": 72.213, "steps_per_second": 36.106}
{"step": 17900, "loss": 0.5129512548446655, "accuracy": 0.9375, "runtime": 9.3838, "samples_per_second": 68.202, "steps_per_second": 34.101}
{"step": 18000, "loss": 0.5193732380867004, "accuracy": 0.934375, "runtime": 9.8155, "samples_per_second": 65.203, "steps_per_second": 32.602}
{"step": 18100, "loss": 0.526825487613678, "accuracy": 0.9359375, "runtime": 9.4888, "samples_per_second": 67.448, "steps_per_second": 33.724}
{"step": 18200, "loss": 0.49438366293907166, "accuracy": 0.9375, "runtime": 9.0053, "samples_per_second": 71.069, "steps_per_second": 35.535}
{"step": 18300, "loss": 0.4908615052700043, "accuracy": 0.940625, "runtime": 8.9462, "samples_per_second": 71.539, "steps_per_second": 35.769}
{"step": 18400, "loss": 0.49182814359664917, "accuracy": 0.940625, "runtime": 8.8977, "samples_per_second": 71.929, "steps_per_second": 35.964}
{"step": 18500, "loss": 0.500982403755188, "accuracy": 0.940625, "runtime": 8.957, "samples_per_second": 71.452, "steps_per_second": 35.726}
{"step": 18600, "loss": 0.5381726026535034, "accuracy": 0.934375, "runtime": 9.335, "samples_per_second": 68.559, "steps_per_second": 34.28}
{"step": 18700, "loss": 0.5262613296508789, "accuracy": 0.9375, "runtime": 8.9495, "samples_per_second": 71.512, "steps_per_second": 35.756}
{"step": 18800, "loss": 0.48612627387046814, "accuracy": 0.9421875, "runtime": 9.169, "samples_per_second": 69.8, "steps_per_second": 34.9}
{"step": 18900, "loss": 0.4876532554626465, "accuracy": 0.9421875, "runtime": 8.7909, "samples_per_second": 72.803, "steps_per_second": 36.401}
{"step": 19000, "loss": 0.4894455373287201, "accuracy": 0.9421875, "runtime": 9.005, "samples_per_second": 71.071, "steps_per_second": 35.536}
{"step": 19100, "loss": 0.4873175024986267, "accuracy": 0.9421875, "runtime": 9.2831, "samples_per_second": 68.943, "steps_per_second": 34.471}
{"step": 19200, "loss": 0.4906328320503235, "accuracy": 0.9421875, "runtime": 9.3644, "samples_per_second": 68.344, "steps_per_second": 34.172}