amzn_yelp / test_outputs.jsonl
tarabfish's picture
Upload folder using huggingface_hub
97c34c5 verified
{"step": 100, "loss": 0.7828950881958008, "accuracy": 0.871875, "runtime": 2.8057, "samples_per_second": 114.053, "steps_per_second": 57.027}
{"step": 200, "loss": 0.6410558819770813, "accuracy": 0.88125, "runtime": 2.9454, "samples_per_second": 108.645, "steps_per_second": 54.323}
{"step": 300, "loss": 0.4724481701850891, "accuracy": 0.90625, "runtime": 2.6489, "samples_per_second": 120.804, "steps_per_second": 60.402}
{"step": 400, "loss": 0.46632274985313416, "accuracy": 0.88125, "runtime": 2.743, "samples_per_second": 116.66, "steps_per_second": 58.33}
{"step": 500, "loss": 0.336311399936676, "accuracy": 0.928125, "runtime": 2.7645, "samples_per_second": 115.753, "steps_per_second": 57.876}
{"step": 600, "loss": 0.5237163305282593, "accuracy": 0.875, "runtime": 2.9888, "samples_per_second": 107.065, "steps_per_second": 53.532}
{"step": 700, "loss": 0.612436056137085, "accuracy": 0.865625, "runtime": 3.0505, "samples_per_second": 104.901, "steps_per_second": 52.45}
{"step": 800, "loss": 0.47742635011672974, "accuracy": 0.9, "runtime": 2.8541, "samples_per_second": 112.12, "steps_per_second": 56.06}
{"step": 900, "loss": 0.4391939640045166, "accuracy": 0.925, "runtime": 2.7428, "samples_per_second": 116.667, "steps_per_second": 58.334}
{"step": 1000, "loss": 0.3529580235481262, "accuracy": 0.925, "runtime": 2.7396, "samples_per_second": 116.807, "steps_per_second": 58.404}
{"step": 1100, "loss": 0.32948821783065796, "accuracy": 0.925, "runtime": 3.0406, "samples_per_second": 105.242, "steps_per_second": 52.621}
{"step": 1200, "loss": 0.5077411532402039, "accuracy": 0.9, "runtime": 3.0926, "samples_per_second": 103.474, "steps_per_second": 51.737}
{"step": 1300, "loss": 0.4512642025947571, "accuracy": 0.9, "runtime": 2.7715, "samples_per_second": 115.459, "steps_per_second": 57.73}
{"step": 1400, "loss": 0.43066683411598206, "accuracy": 0.89375, "runtime": 2.8379, "samples_per_second": 112.76, "steps_per_second": 56.38}
{"step": 1500, "loss": 0.37902718782424927, "accuracy": 0.9125, "runtime": 2.8742, "samples_per_second": 111.336, "steps_per_second": 55.668}
{"step": 1600, "loss": 0.435788631439209, "accuracy": 0.8875, "runtime": 3.0691, "samples_per_second": 104.266, "steps_per_second": 52.133}
{"step": 1700, "loss": 0.6496487855911255, "accuracy": 0.878125, "runtime": 3.019, "samples_per_second": 105.995, "steps_per_second": 52.998}
{"step": 1800, "loss": 0.5356670618057251, "accuracy": 0.896875, "runtime": 2.7479, "samples_per_second": 116.452, "steps_per_second": 58.226}
{"step": 1900, "loss": 0.5577458143234253, "accuracy": 0.903125, "runtime": 2.7431, "samples_per_second": 116.657, "steps_per_second": 58.329}
{"step": 2000, "loss": 0.5010732412338257, "accuracy": 0.9, "runtime": 2.8418, "samples_per_second": 112.606, "steps_per_second": 56.303}
{"step": 2100, "loss": 0.5578997135162354, "accuracy": 0.871875, "runtime": 2.9909, "samples_per_second": 106.99, "steps_per_second": 53.495}
{"step": 2200, "loss": 0.49204739928245544, "accuracy": 0.9125, "runtime": 2.9421, "samples_per_second": 108.768, "steps_per_second": 54.384}
{"step": 2300, "loss": 0.42611804604530334, "accuracy": 0.921875, "runtime": 2.8842, "samples_per_second": 110.949, "steps_per_second": 55.475}
{"step": 2400, "loss": 0.5209008455276489, "accuracy": 0.903125, "runtime": 2.8447, "samples_per_second": 112.49, "steps_per_second": 56.245}
{"step": 2500, "loss": 0.558965802192688, "accuracy": 0.909375, "runtime": 3.0287, "samples_per_second": 105.655, "steps_per_second": 52.828}
{"step": 2600, "loss": 0.5337815284729004, "accuracy": 0.909375, "runtime": 2.8886, "samples_per_second": 110.781, "steps_per_second": 55.391}
{"step": 2700, "loss": 0.43373221158981323, "accuracy": 0.91875, "runtime": 2.927, "samples_per_second": 109.328, "steps_per_second": 54.664}
{"step": 2800, "loss": 0.562017560005188, "accuracy": 0.90625, "runtime": 3.1459, "samples_per_second": 101.72, "steps_per_second": 50.86}
{"step": 2900, "loss": 0.48360759019851685, "accuracy": 0.915625, "runtime": 2.7861, "samples_per_second": 114.855, "steps_per_second": 57.427}
{"step": 3000, "loss": 0.5819778442382812, "accuracy": 0.903125, "runtime": 2.7837, "samples_per_second": 114.956, "steps_per_second": 57.478}
{"step": 3100, "loss": 0.549329400062561, "accuracy": 0.90625, "runtime": 3.1117, "samples_per_second": 102.837, "steps_per_second": 51.418}
{"step": 3200, "loss": 0.5409368276596069, "accuracy": 0.909375, "runtime": 3.0275, "samples_per_second": 105.698, "steps_per_second": 52.849}
{"step": 3300, "loss": 0.5061095952987671, "accuracy": 0.91875, "runtime": 2.7533, "samples_per_second": 116.222, "steps_per_second": 58.111}
{"step": 3400, "loss": 0.5100991129875183, "accuracy": 0.915625, "runtime": 2.8582, "samples_per_second": 111.958, "steps_per_second": 55.979}
{"step": 3500, "loss": 0.48856163024902344, "accuracy": 0.91875, "runtime": 2.8545, "samples_per_second": 112.104, "steps_per_second": 56.052}
{"step": 3600, "loss": 0.5146762728691101, "accuracy": 0.91875, "runtime": 3.0905, "samples_per_second": 103.544, "steps_per_second": 51.772}
{"step": 3700, "loss": 0.5367563962936401, "accuracy": 0.915625, "runtime": 2.9095, "samples_per_second": 109.983, "steps_per_second": 54.992}
{"step": 3800, "loss": 0.4761125445365906, "accuracy": 0.909375, "runtime": 2.8026, "samples_per_second": 114.178, "steps_per_second": 57.089}
{"step": 3900, "loss": 0.4483864903450012, "accuracy": 0.90625, "runtime": 3.0492, "samples_per_second": 104.944, "steps_per_second": 52.472}
{"step": 4000, "loss": 0.35619640350341797, "accuracy": 0.928125, "runtime": 2.9753, "samples_per_second": 107.553, "steps_per_second": 53.777}
{"step": 4100, "loss": 0.38642579317092896, "accuracy": 0.921875, "runtime": 3.0163, "samples_per_second": 106.091, "steps_per_second": 53.046}
{"step": 4200, "loss": 0.5521766543388367, "accuracy": 0.90625, "runtime": 2.8851, "samples_per_second": 110.915, "steps_per_second": 55.457}
{"step": 4300, "loss": 0.4897506833076477, "accuracy": 0.890625, "runtime": 2.805, "samples_per_second": 114.082, "steps_per_second": 57.041}
{"step": 4400, "loss": 0.4268820881843567, "accuracy": 0.921875, "runtime": 3.0937, "samples_per_second": 103.437, "steps_per_second": 51.718}
{"step": 4500, "loss": 0.43541350960731506, "accuracy": 0.928125, "runtime": 2.9558, "samples_per_second": 108.262, "steps_per_second": 54.131}
{"step": 4600, "loss": 0.4590047299861908, "accuracy": 0.91875, "runtime": 3.1134, "samples_per_second": 102.782, "steps_per_second": 51.391}
{"step": 4700, "loss": 0.42555299401283264, "accuracy": 0.921875, "runtime": 2.7577, "samples_per_second": 116.038, "steps_per_second": 58.019}
{"step": 4800, "loss": 0.46792250871658325, "accuracy": 0.921875, "runtime": 3.199, "samples_per_second": 100.03, "steps_per_second": 50.015}
{"step": 4900, "loss": 0.40328049659729004, "accuracy": 0.934375, "runtime": 2.8034, "samples_per_second": 114.146, "steps_per_second": 57.073}
{"step": 5000, "loss": 0.4490671753883362, "accuracy": 0.91875, "runtime": 2.8849, "samples_per_second": 110.924, "steps_per_second": 55.462}
{"step": 5100, "loss": 0.41796278953552246, "accuracy": 0.925, "runtime": 3.1312, "samples_per_second": 102.196, "steps_per_second": 51.098}
{"step": 5200, "loss": 0.3970945477485657, "accuracy": 0.925, "runtime": 2.9082, "samples_per_second": 110.032, "steps_per_second": 55.016}
{"step": 5300, "loss": 0.3905588984489441, "accuracy": 0.94375, "runtime": 2.972, "samples_per_second": 107.671, "steps_per_second": 53.835}
{"step": 5400, "loss": 0.3661550283432007, "accuracy": 0.9375, "runtime": 2.7487, "samples_per_second": 116.419, "steps_per_second": 58.21}
{"step": 5500, "loss": 0.42674118280410767, "accuracy": 0.928125, "runtime": 2.824, "samples_per_second": 113.315, "steps_per_second": 56.658}
{"step": 5600, "loss": 0.47028741240501404, "accuracy": 0.93125, "runtime": 2.9991, "samples_per_second": 106.699, "steps_per_second": 53.35}
{"step": 5700, "loss": 0.48885369300842285, "accuracy": 0.925, "runtime": 3.0056, "samples_per_second": 106.467, "steps_per_second": 53.233}
{"step": 5800, "loss": 0.4400312006473541, "accuracy": 0.925, "runtime": 2.7602, "samples_per_second": 115.934, "steps_per_second": 57.967}
{"step": 5900, "loss": 0.4716396927833557, "accuracy": 0.925, "runtime": 2.8432, "samples_per_second": 112.548, "steps_per_second": 56.274}
{"step": 6000, "loss": 0.45153647661209106, "accuracy": 0.9125, "runtime": 2.8676, "samples_per_second": 111.59, "steps_per_second": 55.795}
{"step": 6100, "loss": 0.45402488112449646, "accuracy": 0.925, "runtime": 3.1616, "samples_per_second": 101.214, "steps_per_second": 50.607}
{"step": 6200, "loss": 0.5484554767608643, "accuracy": 0.909375, "runtime": 3.133, "samples_per_second": 102.137, "steps_per_second": 51.069}
{"step": 6300, "loss": 0.541451632976532, "accuracy": 0.915625, "runtime": 2.8683, "samples_per_second": 111.564, "steps_per_second": 55.782}
{"step": 6400, "loss": 0.619576096534729, "accuracy": 0.9, "runtime": 2.8756, "samples_per_second": 111.281, "steps_per_second": 55.641}
{"step": 6500, "loss": 0.4348299503326416, "accuracy": 0.93125, "runtime": 2.7745, "samples_per_second": 115.336, "steps_per_second": 57.668}
{"step": 6600, "loss": 0.4980931878089905, "accuracy": 0.921875, "runtime": 2.9938, "samples_per_second": 106.888, "steps_per_second": 53.444}
{"step": 6700, "loss": 0.4891650080680847, "accuracy": 0.921875, "runtime": 2.9254, "samples_per_second": 109.387, "steps_per_second": 54.694}
{"step": 6800, "loss": 0.5331800580024719, "accuracy": 0.925, "runtime": 2.8311, "samples_per_second": 113.03, "steps_per_second": 56.515}
{"step": 6900, "loss": 0.5215120315551758, "accuracy": 0.91875, "runtime": 2.7313, "samples_per_second": 117.159, "steps_per_second": 58.58}
{"step": 7000, "loss": 0.5357473492622375, "accuracy": 0.921875, "runtime": 2.872, "samples_per_second": 111.419, "steps_per_second": 55.71}
{"step": 7100, "loss": 0.5796685218811035, "accuracy": 0.91875, "runtime": 3.1783, "samples_per_second": 100.682, "steps_per_second": 50.341}
{"step": 7200, "loss": 0.4970870614051819, "accuracy": 0.921875, "runtime": 3.0019, "samples_per_second": 106.598, "steps_per_second": 53.299}
{"step": 7300, "loss": 0.46576887369155884, "accuracy": 0.925, "runtime": 2.83, "samples_per_second": 113.073, "steps_per_second": 56.536}
{"step": 7400, "loss": 0.46547237038612366, "accuracy": 0.91875, "runtime": 2.873, "samples_per_second": 111.381, "steps_per_second": 55.691}
{"step": 7500, "loss": 0.4488906264305115, "accuracy": 0.93125, "runtime": 2.7426, "samples_per_second": 116.676, "steps_per_second": 58.338}
{"step": 7600, "loss": 0.4806918203830719, "accuracy": 0.93125, "runtime": 2.884, "samples_per_second": 110.955, "steps_per_second": 55.478}
{"step": 7700, "loss": 0.5847142934799194, "accuracy": 0.90625, "runtime": 2.8473, "samples_per_second": 112.387, "steps_per_second": 56.193}
{"step": 7800, "loss": 0.5970549583435059, "accuracy": 0.909375, "runtime": 3.0567, "samples_per_second": 104.688, "steps_per_second": 52.344}
{"step": 7900, "loss": 0.4819498658180237, "accuracy": 0.925, "runtime": 2.9925, "samples_per_second": 106.933, "steps_per_second": 53.467}
{"step": 8000, "loss": 0.4904124140739441, "accuracy": 0.928125, "runtime": 3.0803, "samples_per_second": 103.886, "steps_per_second": 51.943}
{"step": 8100, "loss": 0.4956900179386139, "accuracy": 0.928125, "runtime": 2.7659, "samples_per_second": 115.694, "steps_per_second": 57.847}
{"step": 8200, "loss": 0.5745713114738464, "accuracy": 0.90625, "runtime": 2.9216, "samples_per_second": 109.529, "steps_per_second": 54.764}
{"step": 8300, "loss": 0.5323331952095032, "accuracy": 0.915625, "runtime": 3.1843, "samples_per_second": 100.494, "steps_per_second": 50.247}
{"step": 8400, "loss": 0.5545214414596558, "accuracy": 0.915625, "runtime": 3.0076, "samples_per_second": 106.398, "steps_per_second": 53.199}
{"step": 8500, "loss": 0.43759164214134216, "accuracy": 0.9375, "runtime": 3.0869, "samples_per_second": 103.665, "steps_per_second": 51.832}
{"step": 8600, "loss": 0.43747997283935547, "accuracy": 0.9375, "runtime": 2.8407, "samples_per_second": 112.65, "steps_per_second": 56.325}
{"step": 8700, "loss": 0.43234896659851074, "accuracy": 0.9375, "runtime": 2.8979, "samples_per_second": 110.425, "steps_per_second": 55.213}
{"step": 8800, "loss": 0.4293266832828522, "accuracy": 0.934375, "runtime": 3.1221, "samples_per_second": 102.496, "steps_per_second": 51.248}
{"step": 8900, "loss": 0.4287172853946686, "accuracy": 0.9375, "runtime": 2.9021, "samples_per_second": 110.265, "steps_per_second": 55.132}
{"step": 9000, "loss": 0.43642768263816833, "accuracy": 0.9375, "runtime": 2.9997, "samples_per_second": 106.679, "steps_per_second": 53.339}
{"step": 9100, "loss": 0.4443955421447754, "accuracy": 0.934375, "runtime": 2.7499, "samples_per_second": 116.37, "steps_per_second": 58.185}
{"step": 9200, "loss": 0.43916934728622437, "accuracy": 0.9375, "runtime": 2.8399, "samples_per_second": 112.68, "steps_per_second": 56.34}
{"step": 9300, "loss": 0.44628801941871643, "accuracy": 0.934375, "runtime": 2.7737, "samples_per_second": 115.367, "steps_per_second": 57.684}
{"step": 9400, "loss": 0.4474868178367615, "accuracy": 0.9375, "runtime": 2.8338, "samples_per_second": 112.924, "steps_per_second": 56.462}
{"step": 9500, "loss": 0.448885440826416, "accuracy": 0.9375, "runtime": 3.0868, "samples_per_second": 103.668, "steps_per_second": 51.834}
{"step": 9600, "loss": 0.44648313522338867, "accuracy": 0.9375, "runtime": 3.1112, "samples_per_second": 102.855, "steps_per_second": 51.427}