| { | |
| "best_metric": 0.5172018348623854, | |
| "best_model_checkpoint": "outputs/soft_prompt/deberta-v2-xlarge/sst2/checkpoint-3400", | |
| "epoch": 3.0, | |
| "global_step": 25257, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 2.831559658050537, | |
| "eval_runtime": 12.4014, | |
| "eval_samples_per_second": 70.315, | |
| "eval_steps_per_second": 8.789, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.7252020835876465, | |
| "eval_runtime": 12.5411, | |
| "eval_samples_per_second": 69.532, | |
| "eval_steps_per_second": 8.691, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.0294061052381518, | |
| "loss": 2.7746, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 6.318162441253662, | |
| "eval_runtime": 12.2524, | |
| "eval_samples_per_second": 71.17, | |
| "eval_steps_per_second": 8.896, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.9581830501556396, | |
| "eval_runtime": 13.2254, | |
| "eval_samples_per_second": 65.934, | |
| "eval_steps_per_second": 8.242, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.0288122104763036, | |
| "loss": 2.8668, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.97942715883255, | |
| "eval_runtime": 12.2204, | |
| "eval_samples_per_second": 71.356, | |
| "eval_steps_per_second": 8.92, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 9.121758460998535, | |
| "eval_runtime": 12.6821, | |
| "eval_samples_per_second": 68.758, | |
| "eval_steps_per_second": 8.595, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 2.1010525226593018, | |
| "eval_runtime": 13.3142, | |
| "eval_samples_per_second": 65.494, | |
| "eval_steps_per_second": 8.187, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 0.0282183157144554, | |
| "loss": 3.0595, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 2.240478754043579, | |
| "eval_runtime": 11.7539, | |
| "eval_samples_per_second": 74.188, | |
| "eval_steps_per_second": 9.274, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 2.822434663772583, | |
| "eval_runtime": 11.937, | |
| "eval_samples_per_second": 73.05, | |
| "eval_steps_per_second": 9.131, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.027624420952607195, | |
| "loss": 2.7406, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 2.2581450939178467, | |
| "eval_runtime": 12.944, | |
| "eval_samples_per_second": 67.367, | |
| "eval_steps_per_second": 8.421, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 5.0679168701171875, | |
| "eval_runtime": 12.8993, | |
| "eval_samples_per_second": 67.6, | |
| "eval_steps_per_second": 8.45, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 4.079117774963379, | |
| "eval_runtime": 12.6438, | |
| "eval_samples_per_second": 68.966, | |
| "eval_steps_per_second": 8.621, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 0.027030526190758998, | |
| "loss": 2.341, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 6.498974800109863, | |
| "eval_runtime": 11.7875, | |
| "eval_samples_per_second": 73.977, | |
| "eval_steps_per_second": 9.247, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 2.2673330307006836, | |
| "eval_runtime": 12.4045, | |
| "eval_samples_per_second": 70.297, | |
| "eval_steps_per_second": 8.787, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.026436631428910798, | |
| "loss": 2.5017, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 2.775005340576172, | |
| "eval_runtime": 12.8935, | |
| "eval_samples_per_second": 67.631, | |
| "eval_steps_per_second": 8.454, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_accuracy": 0.5126146788990825, | |
| "eval_loss": 0.8905919790267944, | |
| "eval_runtime": 11.7168, | |
| "eval_samples_per_second": 74.423, | |
| "eval_steps_per_second": 9.303, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_accuracy": 0.5172018348623854, | |
| "eval_loss": 3.1630539894104004, | |
| "eval_runtime": 12.3677, | |
| "eval_samples_per_second": 70.506, | |
| "eval_steps_per_second": 8.813, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.025842736667062594, | |
| "loss": 2.587, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.8831442594528198, | |
| "eval_runtime": 12.7491, | |
| "eval_samples_per_second": 68.397, | |
| "eval_steps_per_second": 8.55, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.40650475025177, | |
| "eval_runtime": 11.8781, | |
| "eval_samples_per_second": 73.412, | |
| "eval_steps_per_second": 9.177, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.025248841905214398, | |
| "loss": 2.2272, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 5.7484235763549805, | |
| "eval_runtime": 12.2084, | |
| "eval_samples_per_second": 71.426, | |
| "eval_steps_per_second": 8.928, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 3.051417589187622, | |
| "eval_runtime": 13.0485, | |
| "eval_samples_per_second": 66.828, | |
| "eval_steps_per_second": 8.353, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.1735517978668213, | |
| "eval_runtime": 12.52, | |
| "eval_samples_per_second": 69.648, | |
| "eval_steps_per_second": 8.706, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.024654947143366194, | |
| "loss": 2.474, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 3.5813305377960205, | |
| "eval_runtime": 12.4532, | |
| "eval_samples_per_second": 70.022, | |
| "eval_steps_per_second": 8.753, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 3.0905096530914307, | |
| "eval_runtime": 12.1764, | |
| "eval_samples_per_second": 71.614, | |
| "eval_steps_per_second": 8.952, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 0.024061052381517994, | |
| "loss": 2.6958, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.6613879203796387, | |
| "eval_runtime": 13.0709, | |
| "eval_samples_per_second": 66.713, | |
| "eval_steps_per_second": 8.339, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.9912046194076538, | |
| "eval_runtime": 11.0684, | |
| "eval_samples_per_second": 78.783, | |
| "eval_steps_per_second": 9.848, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 2.708740711212158, | |
| "eval_runtime": 12.3098, | |
| "eval_samples_per_second": 70.838, | |
| "eval_steps_per_second": 8.855, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.023467157619669794, | |
| "loss": 2.4571, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.9453651309013367, | |
| "eval_runtime": 12.5807, | |
| "eval_samples_per_second": 69.313, | |
| "eval_steps_per_second": 8.664, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.9460629820823669, | |
| "eval_runtime": 12.3033, | |
| "eval_samples_per_second": 70.875, | |
| "eval_steps_per_second": 8.859, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.022873262857821593, | |
| "loss": 2.4046, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_accuracy": 0.5034403669724771, | |
| "eval_loss": 0.7613060474395752, | |
| "eval_runtime": 12.922, | |
| "eval_samples_per_second": 67.482, | |
| "eval_steps_per_second": 8.435, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 3.473540782928467, | |
| "eval_runtime": 12.5608, | |
| "eval_samples_per_second": 69.423, | |
| "eval_steps_per_second": 8.678, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.8309389352798462, | |
| "eval_runtime": 12.3028, | |
| "eval_samples_per_second": 70.878, | |
| "eval_steps_per_second": 8.86, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.022279368095973393, | |
| "loss": 1.9778, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 2.345905303955078, | |
| "eval_runtime": 12.6993, | |
| "eval_samples_per_second": 68.665, | |
| "eval_steps_per_second": 8.583, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.992404818534851, | |
| "eval_runtime": 11.8953, | |
| "eval_samples_per_second": 73.306, | |
| "eval_steps_per_second": 9.163, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.02168547333412519, | |
| "loss": 1.9132, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 0.48394495412844035, | |
| "eval_loss": 0.7653124928474426, | |
| "eval_runtime": 12.5926, | |
| "eval_samples_per_second": 69.247, | |
| "eval_steps_per_second": 8.656, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.6025058031082153, | |
| "eval_runtime": 12.8694, | |
| "eval_samples_per_second": 67.757, | |
| "eval_steps_per_second": 8.47, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 2.3198583126068115, | |
| "eval_runtime": 12.2526, | |
| "eval_samples_per_second": 71.168, | |
| "eval_steps_per_second": 8.896, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.021091578572276993, | |
| "loss": 2.1041, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.9014425277709961, | |
| "eval_runtime": 12.1923, | |
| "eval_samples_per_second": 71.52, | |
| "eval_steps_per_second": 8.94, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.1414848566055298, | |
| "eval_runtime": 12.2534, | |
| "eval_samples_per_second": 71.164, | |
| "eval_steps_per_second": 8.896, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.020497683810428793, | |
| "loss": 2.2236, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.9049626588821411, | |
| "eval_runtime": 12.5976, | |
| "eval_samples_per_second": 69.219, | |
| "eval_steps_per_second": 8.652, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 2.53812575340271, | |
| "eval_runtime": 12.192, | |
| "eval_samples_per_second": 71.522, | |
| "eval_steps_per_second": 8.94, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 3.9090523719787598, | |
| "eval_runtime": 12.1436, | |
| "eval_samples_per_second": 71.807, | |
| "eval_steps_per_second": 8.976, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.01990378904858059, | |
| "loss": 1.9257, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_accuracy": 0.44380733944954126, | |
| "eval_loss": 1.3826260566711426, | |
| "eval_runtime": 12.394, | |
| "eval_samples_per_second": 70.357, | |
| "eval_steps_per_second": 8.795, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.7106401920318604, | |
| "eval_runtime": 12.254, | |
| "eval_samples_per_second": 71.161, | |
| "eval_steps_per_second": 8.895, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 0.019309894286732392, | |
| "loss": 1.9533, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.6487476825714111, | |
| "eval_runtime": 13.5392, | |
| "eval_samples_per_second": 64.406, | |
| "eval_steps_per_second": 8.051, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.4527719020843506, | |
| "eval_runtime": 12.2271, | |
| "eval_samples_per_second": 71.317, | |
| "eval_steps_per_second": 8.915, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.1699163913726807, | |
| "eval_runtime": 12.1388, | |
| "eval_samples_per_second": 71.836, | |
| "eval_steps_per_second": 8.979, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 0.01871599952488419, | |
| "loss": 1.7969, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.155882716178894, | |
| "eval_runtime": 12.1707, | |
| "eval_samples_per_second": 71.648, | |
| "eval_steps_per_second": 8.956, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.0576764345169067, | |
| "eval_runtime": 12.3024, | |
| "eval_samples_per_second": 70.88, | |
| "eval_steps_per_second": 8.86, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "learning_rate": 0.01812210476303599, | |
| "loss": 1.8048, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.1585994958877563, | |
| "eval_runtime": 12.8412, | |
| "eval_samples_per_second": 67.906, | |
| "eval_steps_per_second": 8.488, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.7068227529525757, | |
| "eval_runtime": 12.4262, | |
| "eval_samples_per_second": 70.174, | |
| "eval_steps_per_second": 8.772, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.7638933658599854, | |
| "eval_runtime": 12.0016, | |
| "eval_samples_per_second": 72.657, | |
| "eval_steps_per_second": 9.082, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 0.01752821000118779, | |
| "loss": 2.0729, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.188310146331787, | |
| "eval_runtime": 13.0443, | |
| "eval_samples_per_second": 66.849, | |
| "eval_steps_per_second": 8.356, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.6973594427108765, | |
| "eval_runtime": 11.5707, | |
| "eval_samples_per_second": 75.363, | |
| "eval_steps_per_second": 9.42, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.016934315239339588, | |
| "loss": 1.7558, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.8548436164855957, | |
| "eval_runtime": 13.0206, | |
| "eval_samples_per_second": 66.971, | |
| "eval_steps_per_second": 8.371, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.7331790924072266, | |
| "eval_runtime": 12.107, | |
| "eval_samples_per_second": 72.024, | |
| "eval_steps_per_second": 9.003, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.7065454721450806, | |
| "eval_runtime": 12.0677, | |
| "eval_samples_per_second": 72.259, | |
| "eval_steps_per_second": 9.032, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 0.016340420477491388, | |
| "loss": 1.931, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 3.5782742500305176, | |
| "eval_runtime": 12.3183, | |
| "eval_samples_per_second": 70.789, | |
| "eval_steps_per_second": 8.849, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 2.2677643299102783, | |
| "eval_runtime": 11.6938, | |
| "eval_samples_per_second": 74.569, | |
| "eval_steps_per_second": 9.321, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.015746525715643188, | |
| "loss": 1.739, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.367167353630066, | |
| "eval_runtime": 12.6396, | |
| "eval_samples_per_second": 68.99, | |
| "eval_steps_per_second": 8.624, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.4737409353256226, | |
| "eval_runtime": 12.3657, | |
| "eval_samples_per_second": 70.517, | |
| "eval_steps_per_second": 8.815, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.8329254984855652, | |
| "eval_runtime": 12.1625, | |
| "eval_samples_per_second": 71.696, | |
| "eval_steps_per_second": 8.962, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "learning_rate": 0.015152630953794988, | |
| "loss": 1.655, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 2.237131118774414, | |
| "eval_runtime": 12.2197, | |
| "eval_samples_per_second": 71.36, | |
| "eval_steps_per_second": 8.92, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 3.846944570541382, | |
| "eval_runtime": 12.0155, | |
| "eval_samples_per_second": 72.573, | |
| "eval_steps_per_second": 9.072, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "learning_rate": 0.014558736191946788, | |
| "loss": 1.7284, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.54, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.403191328048706, | |
| "eval_runtime": 12.6645, | |
| "eval_samples_per_second": 68.854, | |
| "eval_steps_per_second": 8.607, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.1265949010849, | |
| "eval_runtime": 12.3046, | |
| "eval_samples_per_second": 70.868, | |
| "eval_steps_per_second": 8.859, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.9130467176437378, | |
| "eval_runtime": 12.0511, | |
| "eval_samples_per_second": 72.359, | |
| "eval_steps_per_second": 9.045, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 0.013964841430098586, | |
| "loss": 1.5742, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.832761824131012, | |
| "eval_runtime": 12.2374, | |
| "eval_samples_per_second": 71.257, | |
| "eval_steps_per_second": 8.907, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 1.64, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 3.8501062393188477, | |
| "eval_runtime": 12.573, | |
| "eval_samples_per_second": 69.355, | |
| "eval_steps_per_second": 8.669, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 0.013370946668250385, | |
| "loss": 1.7039, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.2914493083953857, | |
| "eval_runtime": 12.0326, | |
| "eval_samples_per_second": 72.47, | |
| "eval_steps_per_second": 9.059, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.136923909187317, | |
| "eval_runtime": 11.7256, | |
| "eval_samples_per_second": 74.367, | |
| "eval_steps_per_second": 9.296, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.7574475407600403, | |
| "eval_runtime": 12.2584, | |
| "eval_samples_per_second": 71.135, | |
| "eval_steps_per_second": 8.892, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 0.012777051906402184, | |
| "loss": 1.4352, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.7623356580734253, | |
| "eval_runtime": 12.706, | |
| "eval_samples_per_second": 68.629, | |
| "eval_steps_per_second": 8.579, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.6579828262329102, | |
| "eval_runtime": 12.16, | |
| "eval_samples_per_second": 71.71, | |
| "eval_steps_per_second": 8.964, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.012183157144553985, | |
| "loss": 1.6328, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_accuracy": 0.481651376146789, | |
| "eval_loss": 0.693511426448822, | |
| "eval_runtime": 12.7779, | |
| "eval_samples_per_second": 68.243, | |
| "eval_steps_per_second": 8.53, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.6989684700965881, | |
| "eval_runtime": 12.0393, | |
| "eval_samples_per_second": 72.43, | |
| "eval_steps_per_second": 9.054, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.7303033471107483, | |
| "eval_runtime": 12.3696, | |
| "eval_samples_per_second": 70.496, | |
| "eval_steps_per_second": 8.812, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 0.011589262382705785, | |
| "loss": 1.4498, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.675624132156372, | |
| "eval_runtime": 12.6317, | |
| "eval_samples_per_second": 69.032, | |
| "eval_steps_per_second": 8.629, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.0083491802215576, | |
| "eval_runtime": 12.349, | |
| "eval_samples_per_second": 70.613, | |
| "eval_steps_per_second": 8.827, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 0.010995367620857583, | |
| "loss": 1.4022, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.7790195345878601, | |
| "eval_runtime": 11.9659, | |
| "eval_samples_per_second": 72.874, | |
| "eval_steps_per_second": 9.109, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.296112060546875, | |
| "eval_runtime": 13.0796, | |
| "eval_samples_per_second": 66.669, | |
| "eval_steps_per_second": 8.334, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.7208316326141357, | |
| "eval_runtime": 12.4652, | |
| "eval_samples_per_second": 69.955, | |
| "eval_steps_per_second": 8.744, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.010401472859009383, | |
| "loss": 1.4503, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.8011333346366882, | |
| "eval_runtime": 12.77, | |
| "eval_samples_per_second": 68.285, | |
| "eval_steps_per_second": 8.536, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.8194194436073303, | |
| "eval_runtime": 12.2506, | |
| "eval_samples_per_second": 71.18, | |
| "eval_steps_per_second": 8.898, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 0.009807578097161183, | |
| "loss": 1.3401, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.9209619760513306, | |
| "eval_runtime": 12.7245, | |
| "eval_samples_per_second": 68.529, | |
| "eval_steps_per_second": 8.566, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_accuracy": 0.4954128440366973, | |
| "eval_loss": 2.172947645187378, | |
| "eval_runtime": 13.3907, | |
| "eval_samples_per_second": 65.12, | |
| "eval_steps_per_second": 8.14, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 2.8217873573303223, | |
| "eval_runtime": 12.0414, | |
| "eval_samples_per_second": 72.417, | |
| "eval_steps_per_second": 9.052, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 0.009213683335312983, | |
| "loss": 1.515, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.8880350589752197, | |
| "eval_runtime": 12.7892, | |
| "eval_samples_per_second": 68.183, | |
| "eval_steps_per_second": 8.523, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.8059159517288208, | |
| "eval_runtime": 12.8563, | |
| "eval_samples_per_second": 67.827, | |
| "eval_steps_per_second": 8.478, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "learning_rate": 0.008619788573464782, | |
| "loss": 1.2519, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.14, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.1668144464492798, | |
| "eval_runtime": 12.0845, | |
| "eval_samples_per_second": 72.158, | |
| "eval_steps_per_second": 9.02, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.8440486192703247, | |
| "eval_runtime": 11.5453, | |
| "eval_samples_per_second": 75.529, | |
| "eval_steps_per_second": 9.441, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 2.19, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.482177972793579, | |
| "eval_runtime": 11.5533, | |
| "eval_samples_per_second": 75.476, | |
| "eval_steps_per_second": 9.435, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 0.008025893811616582, | |
| "loss": 1.2221, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.6977333426475525, | |
| "eval_runtime": 12.3902, | |
| "eval_samples_per_second": 70.378, | |
| "eval_steps_per_second": 8.797, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.3418121337890625, | |
| "eval_runtime": 12.4195, | |
| "eval_samples_per_second": 70.212, | |
| "eval_steps_per_second": 8.776, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 0.00743199904976838, | |
| "loss": 1.1201, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.7915144562721252, | |
| "eval_runtime": 12.717, | |
| "eval_samples_per_second": 68.57, | |
| "eval_steps_per_second": 8.571, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.961918830871582, | |
| "eval_runtime": 13.173, | |
| "eval_samples_per_second": 66.196, | |
| "eval_steps_per_second": 8.275, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.220428228378296, | |
| "eval_runtime": 12.2958, | |
| "eval_samples_per_second": 70.918, | |
| "eval_steps_per_second": 8.865, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 0.006838104287920181, | |
| "loss": 1.0869, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.6541168689727783, | |
| "eval_runtime": 12.5954, | |
| "eval_samples_per_second": 69.232, | |
| "eval_steps_per_second": 8.654, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 2.7402684688568115, | |
| "eval_runtime": 12.7516, | |
| "eval_samples_per_second": 68.383, | |
| "eval_steps_per_second": 8.548, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "learning_rate": 0.00624420952607198, | |
| "loss": 1.0804, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.2037415504455566, | |
| "eval_runtime": 13.138, | |
| "eval_samples_per_second": 66.372, | |
| "eval_steps_per_second": 8.297, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.7337152361869812, | |
| "eval_runtime": 11.6201, | |
| "eval_samples_per_second": 75.043, | |
| "eval_steps_per_second": 9.38, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.8853695392608643, | |
| "eval_runtime": 13.0127, | |
| "eval_samples_per_second": 67.012, | |
| "eval_steps_per_second": 8.376, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 0.005650314764223779, | |
| "loss": 1.0025, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.7113233804702759, | |
| "eval_runtime": 12.2168, | |
| "eval_samples_per_second": 71.377, | |
| "eval_steps_per_second": 8.922, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.0583016872406006, | |
| "eval_runtime": 12.0017, | |
| "eval_samples_per_second": 72.657, | |
| "eval_steps_per_second": 9.082, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 0.005056420002375579, | |
| "loss": 0.9856, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.7113476395606995, | |
| "eval_runtime": 12.9697, | |
| "eval_samples_per_second": 67.234, | |
| "eval_steps_per_second": 8.404, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.7482680678367615, | |
| "eval_runtime": 12.7958, | |
| "eval_samples_per_second": 68.147, | |
| "eval_steps_per_second": 8.518, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.6966097950935364, | |
| "eval_runtime": 12.2964, | |
| "eval_samples_per_second": 70.915, | |
| "eval_steps_per_second": 8.864, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 0.004462525240527379, | |
| "loss": 1.0364, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.0607110261917114, | |
| "eval_runtime": 12.067, | |
| "eval_samples_per_second": 72.263, | |
| "eval_steps_per_second": 9.033, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.1381345987319946, | |
| "eval_runtime": 12.2151, | |
| "eval_samples_per_second": 71.387, | |
| "eval_steps_per_second": 8.923, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 0.003868630478679178, | |
| "loss": 0.9683, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.7138826847076416, | |
| "eval_runtime": 12.7162, | |
| "eval_samples_per_second": 68.574, | |
| "eval_steps_per_second": 8.572, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.6361145973205566, | |
| "eval_runtime": 11.8698, | |
| "eval_samples_per_second": 73.464, | |
| "eval_steps_per_second": 9.183, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 1.1421782970428467, | |
| "eval_runtime": 11.321, | |
| "eval_samples_per_second": 77.025, | |
| "eval_steps_per_second": 9.628, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 0.0032747357168309774, | |
| "loss": 0.908, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.7317955493927002, | |
| "eval_runtime": 12.2644, | |
| "eval_samples_per_second": 71.1, | |
| "eval_steps_per_second": 8.888, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.6962340474128723, | |
| "eval_runtime": 11.822, | |
| "eval_samples_per_second": 73.761, | |
| "eval_steps_per_second": 9.22, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 0.0026808409549827768, | |
| "loss": 0.8761, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.9568504691123962, | |
| "eval_runtime": 12.3037, | |
| "eval_samples_per_second": 70.873, | |
| "eval_steps_per_second": 8.859, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 2.76, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.9744265675544739, | |
| "eval_runtime": 11.9452, | |
| "eval_samples_per_second": 73.0, | |
| "eval_steps_per_second": 9.125, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 1.091610312461853, | |
| "eval_runtime": 12.8562, | |
| "eval_samples_per_second": 67.827, | |
| "eval_steps_per_second": 8.478, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "learning_rate": 0.0020869461931345766, | |
| "loss": 0.8209, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.8106526136398315, | |
| "eval_runtime": 11.7827, | |
| "eval_samples_per_second": 74.007, | |
| "eval_steps_per_second": 9.251, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.7210954427719116, | |
| "eval_runtime": 11.3303, | |
| "eval_samples_per_second": 76.962, | |
| "eval_steps_per_second": 9.62, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 0.001493051431286376, | |
| "loss": 0.8008, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_accuracy": 0.49770642201834864, | |
| "eval_loss": 0.6930689811706543, | |
| "eval_runtime": 12.4553, | |
| "eval_samples_per_second": 70.01, | |
| "eval_steps_per_second": 8.751, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_accuracy": 0.4908256880733945, | |
| "eval_loss": 0.7184925675392151, | |
| "eval_runtime": 12.3113, | |
| "eval_samples_per_second": 70.829, | |
| "eval_steps_per_second": 8.854, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.729166567325592, | |
| "eval_runtime": 11.2952, | |
| "eval_samples_per_second": 77.201, | |
| "eval_steps_per_second": 9.65, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 0.0008991566694381756, | |
| "loss": 0.7738, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.7178325057029724, | |
| "eval_runtime": 12.4374, | |
| "eval_samples_per_second": 70.111, | |
| "eval_steps_per_second": 8.764, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.6961101293563843, | |
| "eval_runtime": 12.5193, | |
| "eval_samples_per_second": 69.652, | |
| "eval_steps_per_second": 8.707, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 0.00030526190758997505, | |
| "loss": 0.755, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.7101095914840698, | |
| "eval_runtime": 12.5702, | |
| "eval_samples_per_second": 69.371, | |
| "eval_steps_per_second": 8.671, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_accuracy": 0.5091743119266054, | |
| "eval_loss": 0.7049440145492554, | |
| "eval_runtime": 11.2857, | |
| "eval_samples_per_second": 77.266, | |
| "eval_steps_per_second": 9.658, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 25257, | |
| "total_flos": 1.070008779996841e+17, | |
| "train_loss": 1.708119561667774, | |
| "train_runtime": 7791.0638, | |
| "train_samples_per_second": 25.933, | |
| "train_steps_per_second": 3.242 | |
| } | |
| ], | |
| "max_steps": 25257, | |
| "num_train_epochs": 3, | |
| "total_flos": 1.070008779996841e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |