| { | |
| "best_metric": 0.7027972027972028, | |
| "best_model_checkpoint": "wav2vec2-5Class-train-test-finetune/checkpoint-721", | |
| "epoch": 323.0769230769231, | |
| "eval_steps": 500, | |
| "global_step": 1050, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.92, | |
| "eval_accuracy": 0.34265734265734266, | |
| "eval_loss": 1.59893798828125, | |
| "eval_runtime": 4.2802, | |
| "eval_samples_per_second": 66.819, | |
| "eval_steps_per_second": 0.701, | |
| "step": 3 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "eval_accuracy": 0.34265734265734266, | |
| "eval_loss": 1.5987956523895264, | |
| "eval_runtime": 4.8166, | |
| "eval_samples_per_second": 59.378, | |
| "eval_steps_per_second": 0.623, | |
| "step": 6 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_accuracy": 0.34265734265734266, | |
| "eval_loss": 1.598555326461792, | |
| "eval_runtime": 3.989, | |
| "eval_samples_per_second": 71.697, | |
| "eval_steps_per_second": 0.752, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.34265734265734266, | |
| "eval_loss": 1.598075270652771, | |
| "eval_runtime": 4.3871, | |
| "eval_samples_per_second": 65.191, | |
| "eval_steps_per_second": 0.684, | |
| "step": 13 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "eval_accuracy": 0.3356643356643357, | |
| "eval_loss": 1.5975924730300903, | |
| "eval_runtime": 4.7955, | |
| "eval_samples_per_second": 59.639, | |
| "eval_steps_per_second": 0.626, | |
| "step": 16 | |
| }, | |
| { | |
| "epoch": 5.85, | |
| "eval_accuracy": 0.34265734265734266, | |
| "eval_loss": 1.5970256328582764, | |
| "eval_runtime": 4.4665, | |
| "eval_samples_per_second": 64.032, | |
| "eval_steps_per_second": 0.672, | |
| "step": 19 | |
| }, | |
| { | |
| "epoch": 6.77, | |
| "eval_accuracy": 0.33916083916083917, | |
| "eval_loss": 1.5963499546051025, | |
| "eval_runtime": 4.3016, | |
| "eval_samples_per_second": 66.488, | |
| "eval_steps_per_second": 0.697, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.3356643356643357, | |
| "eval_loss": 1.5952636003494263, | |
| "eval_runtime": 3.9531, | |
| "eval_samples_per_second": 72.347, | |
| "eval_steps_per_second": 0.759, | |
| "step": 26 | |
| }, | |
| { | |
| "epoch": 8.92, | |
| "eval_accuracy": 0.32867132867132864, | |
| "eval_loss": 1.594333291053772, | |
| "eval_runtime": 5.6915, | |
| "eval_samples_per_second": 50.25, | |
| "eval_steps_per_second": 0.527, | |
| "step": 29 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "eval_accuracy": 0.32867132867132864, | |
| "eval_loss": 1.5933252573013306, | |
| "eval_runtime": 4.4236, | |
| "eval_samples_per_second": 64.653, | |
| "eval_steps_per_second": 0.678, | |
| "step": 32 | |
| }, | |
| { | |
| "epoch": 10.77, | |
| "eval_accuracy": 0.32167832167832167, | |
| "eval_loss": 1.592211365699768, | |
| "eval_runtime": 4.9541, | |
| "eval_samples_per_second": 57.73, | |
| "eval_steps_per_second": 0.606, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.3181818181818182, | |
| "eval_loss": 1.5905568599700928, | |
| "eval_runtime": 5.1955, | |
| "eval_samples_per_second": 55.047, | |
| "eval_steps_per_second": 0.577, | |
| "step": 39 | |
| }, | |
| { | |
| "epoch": 12.92, | |
| "eval_accuracy": 0.3146853146853147, | |
| "eval_loss": 1.58920156955719, | |
| "eval_runtime": 3.6236, | |
| "eval_samples_per_second": 78.926, | |
| "eval_steps_per_second": 0.828, | |
| "step": 42 | |
| }, | |
| { | |
| "epoch": 13.85, | |
| "eval_accuracy": 0.3006993006993007, | |
| "eval_loss": 1.5877453088760376, | |
| "eval_runtime": 4.348, | |
| "eval_samples_per_second": 65.778, | |
| "eval_steps_per_second": 0.69, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 14.77, | |
| "eval_accuracy": 0.2937062937062937, | |
| "eval_loss": 1.5862104892730713, | |
| "eval_runtime": 4.6902, | |
| "eval_samples_per_second": 60.978, | |
| "eval_steps_per_second": 0.64, | |
| "step": 48 | |
| }, | |
| { | |
| "epoch": 15.38, | |
| "grad_norm": 65952.0234375, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 1.5907, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.2972027972027972, | |
| "eval_loss": 1.5840750932693481, | |
| "eval_runtime": 4.547, | |
| "eval_samples_per_second": 62.899, | |
| "eval_steps_per_second": 0.66, | |
| "step": 52 | |
| }, | |
| { | |
| "epoch": 16.92, | |
| "eval_accuracy": 0.28321678321678323, | |
| "eval_loss": 1.5823713541030884, | |
| "eval_runtime": 5.3625, | |
| "eval_samples_per_second": 53.334, | |
| "eval_steps_per_second": 0.559, | |
| "step": 55 | |
| }, | |
| { | |
| "epoch": 17.85, | |
| "eval_accuracy": 0.27972027972027974, | |
| "eval_loss": 1.5806101560592651, | |
| "eval_runtime": 4.7671, | |
| "eval_samples_per_second": 59.995, | |
| "eval_steps_per_second": 0.629, | |
| "step": 58 | |
| }, | |
| { | |
| "epoch": 18.77, | |
| "eval_accuracy": 0.2692307692307692, | |
| "eval_loss": 1.5787912607192993, | |
| "eval_runtime": 4.3086, | |
| "eval_samples_per_second": 66.378, | |
| "eval_steps_per_second": 0.696, | |
| "step": 61 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.2692307692307692, | |
| "eval_loss": 1.576175332069397, | |
| "eval_runtime": 5.3175, | |
| "eval_samples_per_second": 53.784, | |
| "eval_steps_per_second": 0.564, | |
| "step": 65 | |
| }, | |
| { | |
| "epoch": 20.92, | |
| "eval_accuracy": 0.26573426573426573, | |
| "eval_loss": 1.5740149021148682, | |
| "eval_runtime": 4.5172, | |
| "eval_samples_per_second": 63.314, | |
| "eval_steps_per_second": 0.664, | |
| "step": 68 | |
| }, | |
| { | |
| "epoch": 21.85, | |
| "eval_accuracy": 0.25524475524475526, | |
| "eval_loss": 1.5717105865478516, | |
| "eval_runtime": 3.9011, | |
| "eval_samples_per_second": 73.312, | |
| "eval_steps_per_second": 0.769, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 22.77, | |
| "eval_accuracy": 0.2517482517482518, | |
| "eval_loss": 1.5693939924240112, | |
| "eval_runtime": 3.9307, | |
| "eval_samples_per_second": 72.76, | |
| "eval_steps_per_second": 0.763, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.23776223776223776, | |
| "eval_loss": 1.566083312034607, | |
| "eval_runtime": 3.7134, | |
| "eval_samples_per_second": 77.019, | |
| "eval_steps_per_second": 0.808, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 24.92, | |
| "eval_accuracy": 0.23426573426573427, | |
| "eval_loss": 1.5634570121765137, | |
| "eval_runtime": 4.5234, | |
| "eval_samples_per_second": 63.226, | |
| "eval_steps_per_second": 0.663, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 25.85, | |
| "eval_accuracy": 0.22377622377622378, | |
| "eval_loss": 1.5608404874801636, | |
| "eval_runtime": 4.4129, | |
| "eval_samples_per_second": 64.81, | |
| "eval_steps_per_second": 0.68, | |
| "step": 84 | |
| }, | |
| { | |
| "epoch": 26.77, | |
| "eval_accuracy": 0.22377622377622378, | |
| "eval_loss": 1.5581375360488892, | |
| "eval_runtime": 4.7168, | |
| "eval_samples_per_second": 60.635, | |
| "eval_steps_per_second": 0.636, | |
| "step": 87 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5542311668395996, | |
| "eval_runtime": 5.4736, | |
| "eval_samples_per_second": 52.251, | |
| "eval_steps_per_second": 0.548, | |
| "step": 91 | |
| }, | |
| { | |
| "epoch": 28.92, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5511480569839478, | |
| "eval_runtime": 5.6532, | |
| "eval_samples_per_second": 50.591, | |
| "eval_steps_per_second": 0.531, | |
| "step": 94 | |
| }, | |
| { | |
| "epoch": 29.85, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5479341745376587, | |
| "eval_runtime": 5.2852, | |
| "eval_samples_per_second": 54.113, | |
| "eval_steps_per_second": 0.568, | |
| "step": 97 | |
| }, | |
| { | |
| "epoch": 30.77, | |
| "grad_norm": 68930.8125, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 1.5431, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 30.77, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5448040962219238, | |
| "eval_runtime": 4.6157, | |
| "eval_samples_per_second": 61.962, | |
| "eval_steps_per_second": 0.65, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5407565832138062, | |
| "eval_runtime": 6.2131, | |
| "eval_samples_per_second": 46.032, | |
| "eval_steps_per_second": 0.483, | |
| "step": 104 | |
| }, | |
| { | |
| "epoch": 32.92, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5379865169525146, | |
| "eval_runtime": 4.645, | |
| "eval_samples_per_second": 61.571, | |
| "eval_steps_per_second": 0.646, | |
| "step": 107 | |
| }, | |
| { | |
| "epoch": 33.85, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5359200239181519, | |
| "eval_runtime": 5.5884, | |
| "eval_samples_per_second": 51.178, | |
| "eval_steps_per_second": 0.537, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 34.77, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5345218181610107, | |
| "eval_runtime": 4.5718, | |
| "eval_samples_per_second": 62.557, | |
| "eval_steps_per_second": 0.656, | |
| "step": 113 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5334985256195068, | |
| "eval_runtime": 5.3526, | |
| "eval_samples_per_second": 53.432, | |
| "eval_steps_per_second": 0.56, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 36.92, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5340909957885742, | |
| "eval_runtime": 4.471, | |
| "eval_samples_per_second": 63.967, | |
| "eval_steps_per_second": 0.671, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 37.85, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5361381769180298, | |
| "eval_runtime": 3.5623, | |
| "eval_samples_per_second": 80.286, | |
| "eval_steps_per_second": 0.842, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 38.77, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5397439002990723, | |
| "eval_runtime": 4.9023, | |
| "eval_samples_per_second": 58.34, | |
| "eval_steps_per_second": 0.612, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5478534698486328, | |
| "eval_runtime": 3.7352, | |
| "eval_samples_per_second": 76.569, | |
| "eval_steps_per_second": 0.803, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 40.92, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5564229488372803, | |
| "eval_runtime": 4.3225, | |
| "eval_samples_per_second": 66.166, | |
| "eval_steps_per_second": 0.694, | |
| "step": 133 | |
| }, | |
| { | |
| "epoch": 41.85, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5678777694702148, | |
| "eval_runtime": 4.6076, | |
| "eval_samples_per_second": 62.072, | |
| "eval_steps_per_second": 0.651, | |
| "step": 136 | |
| }, | |
| { | |
| "epoch": 42.77, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.5821971893310547, | |
| "eval_runtime": 4.2697, | |
| "eval_samples_per_second": 66.983, | |
| "eval_steps_per_second": 0.703, | |
| "step": 139 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.6002099514007568, | |
| "eval_runtime": 4.533, | |
| "eval_samples_per_second": 63.094, | |
| "eval_steps_per_second": 0.662, | |
| "step": 143 | |
| }, | |
| { | |
| "epoch": 44.92, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.6109449863433838, | |
| "eval_runtime": 3.9799, | |
| "eval_samples_per_second": 71.861, | |
| "eval_steps_per_second": 0.754, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 45.85, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.6145771741867065, | |
| "eval_runtime": 4.3613, | |
| "eval_samples_per_second": 65.576, | |
| "eval_steps_per_second": 0.688, | |
| "step": 149 | |
| }, | |
| { | |
| "epoch": 46.15, | |
| "grad_norm": 45833.69921875, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 1.4033, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 46.77, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.6130825281143188, | |
| "eval_runtime": 4.2963, | |
| "eval_samples_per_second": 66.568, | |
| "eval_steps_per_second": 0.698, | |
| "step": 152 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.22727272727272727, | |
| "eval_loss": 1.6008453369140625, | |
| "eval_runtime": 4.063, | |
| "eval_samples_per_second": 70.391, | |
| "eval_steps_per_second": 0.738, | |
| "step": 156 | |
| }, | |
| { | |
| "epoch": 48.92, | |
| "eval_accuracy": 0.24125874125874125, | |
| "eval_loss": 1.586226224899292, | |
| "eval_runtime": 4.5029, | |
| "eval_samples_per_second": 63.515, | |
| "eval_steps_per_second": 0.666, | |
| "step": 159 | |
| }, | |
| { | |
| "epoch": 49.85, | |
| "eval_accuracy": 0.2692307692307692, | |
| "eval_loss": 1.572645902633667, | |
| "eval_runtime": 5.0597, | |
| "eval_samples_per_second": 56.525, | |
| "eval_steps_per_second": 0.593, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 50.77, | |
| "eval_accuracy": 0.2692307692307692, | |
| "eval_loss": 1.559901237487793, | |
| "eval_runtime": 4.4174, | |
| "eval_samples_per_second": 64.744, | |
| "eval_steps_per_second": 0.679, | |
| "step": 165 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.2867132867132867, | |
| "eval_loss": 1.5458828210830688, | |
| "eval_runtime": 4.357, | |
| "eval_samples_per_second": 65.642, | |
| "eval_steps_per_second": 0.689, | |
| "step": 169 | |
| }, | |
| { | |
| "epoch": 52.92, | |
| "eval_accuracy": 0.2937062937062937, | |
| "eval_loss": 1.5382803678512573, | |
| "eval_runtime": 5.6394, | |
| "eval_samples_per_second": 50.714, | |
| "eval_steps_per_second": 0.532, | |
| "step": 172 | |
| }, | |
| { | |
| "epoch": 53.85, | |
| "eval_accuracy": 0.3146853146853147, | |
| "eval_loss": 1.5310516357421875, | |
| "eval_runtime": 4.4695, | |
| "eval_samples_per_second": 63.989, | |
| "eval_steps_per_second": 0.671, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 54.77, | |
| "eval_accuracy": 0.32517482517482516, | |
| "eval_loss": 1.5242317914962769, | |
| "eval_runtime": 3.8554, | |
| "eval_samples_per_second": 74.181, | |
| "eval_steps_per_second": 0.778, | |
| "step": 178 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.3356643356643357, | |
| "eval_loss": 1.5169461965560913, | |
| "eval_runtime": 3.9817, | |
| "eval_samples_per_second": 71.828, | |
| "eval_steps_per_second": 0.753, | |
| "step": 182 | |
| }, | |
| { | |
| "epoch": 56.92, | |
| "eval_accuracy": 0.34265734265734266, | |
| "eval_loss": 1.5103094577789307, | |
| "eval_runtime": 3.9287, | |
| "eval_samples_per_second": 72.797, | |
| "eval_steps_per_second": 0.764, | |
| "step": 185 | |
| }, | |
| { | |
| "epoch": 57.85, | |
| "eval_accuracy": 0.34615384615384615, | |
| "eval_loss": 1.5055506229400635, | |
| "eval_runtime": 4.3922, | |
| "eval_samples_per_second": 65.115, | |
| "eval_steps_per_second": 0.683, | |
| "step": 188 | |
| }, | |
| { | |
| "epoch": 58.77, | |
| "eval_accuracy": 0.34615384615384615, | |
| "eval_loss": 1.4995349645614624, | |
| "eval_runtime": 4.2261, | |
| "eval_samples_per_second": 67.675, | |
| "eval_steps_per_second": 0.71, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.34965034965034963, | |
| "eval_loss": 1.4939184188842773, | |
| "eval_runtime": 3.9946, | |
| "eval_samples_per_second": 71.597, | |
| "eval_steps_per_second": 0.751, | |
| "step": 195 | |
| }, | |
| { | |
| "epoch": 60.92, | |
| "eval_accuracy": 0.36013986013986016, | |
| "eval_loss": 1.4870301485061646, | |
| "eval_runtime": 4.7123, | |
| "eval_samples_per_second": 60.693, | |
| "eval_steps_per_second": 0.637, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 61.54, | |
| "grad_norm": 27324.4609375, | |
| "learning_rate": 2.6984126984126984e-05, | |
| "loss": 1.2485, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 61.85, | |
| "eval_accuracy": 0.36713286713286714, | |
| "eval_loss": 1.4828742742538452, | |
| "eval_runtime": 4.8484, | |
| "eval_samples_per_second": 58.989, | |
| "eval_steps_per_second": 0.619, | |
| "step": 201 | |
| }, | |
| { | |
| "epoch": 62.77, | |
| "eval_accuracy": 0.3741258741258741, | |
| "eval_loss": 1.4735387563705444, | |
| "eval_runtime": 4.203, | |
| "eval_samples_per_second": 68.047, | |
| "eval_steps_per_second": 0.714, | |
| "step": 204 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.3811188811188811, | |
| "eval_loss": 1.4612373113632202, | |
| "eval_runtime": 4.6341, | |
| "eval_samples_per_second": 61.716, | |
| "eval_steps_per_second": 0.647, | |
| "step": 208 | |
| }, | |
| { | |
| "epoch": 64.92, | |
| "eval_accuracy": 0.3986013986013986, | |
| "eval_loss": 1.4491915702819824, | |
| "eval_runtime": 3.9863, | |
| "eval_samples_per_second": 71.745, | |
| "eval_steps_per_second": 0.753, | |
| "step": 211 | |
| }, | |
| { | |
| "epoch": 65.85, | |
| "eval_accuracy": 0.4125874125874126, | |
| "eval_loss": 1.4364999532699585, | |
| "eval_runtime": 4.1321, | |
| "eval_samples_per_second": 69.214, | |
| "eval_steps_per_second": 0.726, | |
| "step": 214 | |
| }, | |
| { | |
| "epoch": 66.77, | |
| "eval_accuracy": 0.4230769230769231, | |
| "eval_loss": 1.4226809740066528, | |
| "eval_runtime": 4.2397, | |
| "eval_samples_per_second": 67.458, | |
| "eval_steps_per_second": 0.708, | |
| "step": 217 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.43356643356643354, | |
| "eval_loss": 1.4095807075500488, | |
| "eval_runtime": 3.8645, | |
| "eval_samples_per_second": 74.007, | |
| "eval_steps_per_second": 0.776, | |
| "step": 221 | |
| }, | |
| { | |
| "epoch": 68.92, | |
| "eval_accuracy": 0.4370629370629371, | |
| "eval_loss": 1.4010183811187744, | |
| "eval_runtime": 4.5348, | |
| "eval_samples_per_second": 63.068, | |
| "eval_steps_per_second": 0.662, | |
| "step": 224 | |
| }, | |
| { | |
| "epoch": 69.85, | |
| "eval_accuracy": 0.4405594405594406, | |
| "eval_loss": 1.3949679136276245, | |
| "eval_runtime": 4.4414, | |
| "eval_samples_per_second": 64.394, | |
| "eval_steps_per_second": 0.675, | |
| "step": 227 | |
| }, | |
| { | |
| "epoch": 70.77, | |
| "eval_accuracy": 0.4370629370629371, | |
| "eval_loss": 1.3919552564620972, | |
| "eval_runtime": 4.3028, | |
| "eval_samples_per_second": 66.468, | |
| "eval_steps_per_second": 0.697, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.4405594405594406, | |
| "eval_loss": 1.3798925876617432, | |
| "eval_runtime": 3.4387, | |
| "eval_samples_per_second": 83.17, | |
| "eval_steps_per_second": 0.872, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 72.92, | |
| "eval_accuracy": 0.44755244755244755, | |
| "eval_loss": 1.366864800453186, | |
| "eval_runtime": 4.6503, | |
| "eval_samples_per_second": 61.502, | |
| "eval_steps_per_second": 0.645, | |
| "step": 237 | |
| }, | |
| { | |
| "epoch": 73.85, | |
| "eval_accuracy": 0.45454545454545453, | |
| "eval_loss": 1.3514918088912964, | |
| "eval_runtime": 4.5609, | |
| "eval_samples_per_second": 62.707, | |
| "eval_steps_per_second": 0.658, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 74.77, | |
| "eval_accuracy": 0.47202797202797203, | |
| "eval_loss": 1.3400850296020508, | |
| "eval_runtime": 3.8017, | |
| "eval_samples_per_second": 75.229, | |
| "eval_steps_per_second": 0.789, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.4825174825174825, | |
| "eval_loss": 1.3286209106445312, | |
| "eval_runtime": 5.7477, | |
| "eval_samples_per_second": 49.759, | |
| "eval_steps_per_second": 0.522, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 76.92, | |
| "grad_norm": 23198.236328125, | |
| "learning_rate": 2.5396825396825397e-05, | |
| "loss": 1.1198, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 76.92, | |
| "eval_accuracy": 0.486013986013986, | |
| "eval_loss": 1.317462682723999, | |
| "eval_runtime": 4.5266, | |
| "eval_samples_per_second": 63.182, | |
| "eval_steps_per_second": 0.663, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 77.85, | |
| "eval_accuracy": 0.48951048951048953, | |
| "eval_loss": 1.3067171573638916, | |
| "eval_runtime": 3.882, | |
| "eval_samples_per_second": 73.673, | |
| "eval_steps_per_second": 0.773, | |
| "step": 253 | |
| }, | |
| { | |
| "epoch": 78.77, | |
| "eval_accuracy": 0.4825174825174825, | |
| "eval_loss": 1.3013015985488892, | |
| "eval_runtime": 4.0902, | |
| "eval_samples_per_second": 69.923, | |
| "eval_steps_per_second": 0.733, | |
| "step": 256 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.479020979020979, | |
| "eval_loss": 1.2954434156417847, | |
| "eval_runtime": 5.4081, | |
| "eval_samples_per_second": 52.884, | |
| "eval_steps_per_second": 0.555, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 80.92, | |
| "eval_accuracy": 0.486013986013986, | |
| "eval_loss": 1.289677381515503, | |
| "eval_runtime": 4.384, | |
| "eval_samples_per_second": 65.238, | |
| "eval_steps_per_second": 0.684, | |
| "step": 263 | |
| }, | |
| { | |
| "epoch": 81.85, | |
| "eval_accuracy": 0.486013986013986, | |
| "eval_loss": 1.283199667930603, | |
| "eval_runtime": 4.3325, | |
| "eval_samples_per_second": 66.013, | |
| "eval_steps_per_second": 0.692, | |
| "step": 266 | |
| }, | |
| { | |
| "epoch": 82.77, | |
| "eval_accuracy": 0.4825174825174825, | |
| "eval_loss": 1.2712346315383911, | |
| "eval_runtime": 4.6039, | |
| "eval_samples_per_second": 62.121, | |
| "eval_steps_per_second": 0.652, | |
| "step": 269 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.493006993006993, | |
| "eval_loss": 1.2584125995635986, | |
| "eval_runtime": 4.5791, | |
| "eval_samples_per_second": 62.458, | |
| "eval_steps_per_second": 0.655, | |
| "step": 273 | |
| }, | |
| { | |
| "epoch": 84.92, | |
| "eval_accuracy": 0.4965034965034965, | |
| "eval_loss": 1.2516244649887085, | |
| "eval_runtime": 4.8825, | |
| "eval_samples_per_second": 58.577, | |
| "eval_steps_per_second": 0.614, | |
| "step": 276 | |
| }, | |
| { | |
| "epoch": 85.85, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.2455971240997314, | |
| "eval_runtime": 3.9744, | |
| "eval_samples_per_second": 71.96, | |
| "eval_steps_per_second": 0.755, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 86.77, | |
| "eval_accuracy": 0.5104895104895105, | |
| "eval_loss": 1.2443982362747192, | |
| "eval_runtime": 4.5207, | |
| "eval_samples_per_second": 63.265, | |
| "eval_steps_per_second": 0.664, | |
| "step": 282 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.5104895104895105, | |
| "eval_loss": 1.2373132705688477, | |
| "eval_runtime": 5.6152, | |
| "eval_samples_per_second": 50.933, | |
| "eval_steps_per_second": 0.534, | |
| "step": 286 | |
| }, | |
| { | |
| "epoch": 88.92, | |
| "eval_accuracy": 0.513986013986014, | |
| "eval_loss": 1.2309471368789673, | |
| "eval_runtime": 4.7969, | |
| "eval_samples_per_second": 59.622, | |
| "eval_steps_per_second": 0.625, | |
| "step": 289 | |
| }, | |
| { | |
| "epoch": 89.85, | |
| "eval_accuracy": 0.5209790209790209, | |
| "eval_loss": 1.2219436168670654, | |
| "eval_runtime": 4.2518, | |
| "eval_samples_per_second": 67.266, | |
| "eval_steps_per_second": 0.706, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 90.77, | |
| "eval_accuracy": 0.5209790209790209, | |
| "eval_loss": 1.2145464420318604, | |
| "eval_runtime": 4.6368, | |
| "eval_samples_per_second": 61.68, | |
| "eval_steps_per_second": 0.647, | |
| "step": 295 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.527972027972028, | |
| "eval_loss": 1.2054263353347778, | |
| "eval_runtime": 4.2071, | |
| "eval_samples_per_second": 67.98, | |
| "eval_steps_per_second": 0.713, | |
| "step": 299 | |
| }, | |
| { | |
| "epoch": 92.31, | |
| "grad_norm": 29195.7578125, | |
| "learning_rate": 2.380952380952381e-05, | |
| "loss": 0.9915, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 92.92, | |
| "eval_accuracy": 0.534965034965035, | |
| "eval_loss": 1.1981616020202637, | |
| "eval_runtime": 4.3609, | |
| "eval_samples_per_second": 65.583, | |
| "eval_steps_per_second": 0.688, | |
| "step": 302 | |
| }, | |
| { | |
| "epoch": 93.85, | |
| "eval_accuracy": 0.5384615384615384, | |
| "eval_loss": 1.1913262605667114, | |
| "eval_runtime": 3.9073, | |
| "eval_samples_per_second": 73.197, | |
| "eval_steps_per_second": 0.768, | |
| "step": 305 | |
| }, | |
| { | |
| "epoch": 94.77, | |
| "eval_accuracy": 0.5454545454545454, | |
| "eval_loss": 1.185881495475769, | |
| "eval_runtime": 3.928, | |
| "eval_samples_per_second": 72.811, | |
| "eval_steps_per_second": 0.764, | |
| "step": 308 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_accuracy": 0.548951048951049, | |
| "eval_loss": 1.179394006729126, | |
| "eval_runtime": 4.1933, | |
| "eval_samples_per_second": 68.204, | |
| "eval_steps_per_second": 0.715, | |
| "step": 312 | |
| }, | |
| { | |
| "epoch": 96.92, | |
| "eval_accuracy": 0.5454545454545454, | |
| "eval_loss": 1.1733678579330444, | |
| "eval_runtime": 5.0205, | |
| "eval_samples_per_second": 56.967, | |
| "eval_steps_per_second": 0.598, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 97.85, | |
| "eval_accuracy": 0.5524475524475524, | |
| "eval_loss": 1.1637603044509888, | |
| "eval_runtime": 4.8886, | |
| "eval_samples_per_second": 58.503, | |
| "eval_steps_per_second": 0.614, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 98.77, | |
| "eval_accuracy": 0.5524475524475524, | |
| "eval_loss": 1.1549575328826904, | |
| "eval_runtime": 4.9266, | |
| "eval_samples_per_second": 58.052, | |
| "eval_steps_per_second": 0.609, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_accuracy": 0.548951048951049, | |
| "eval_loss": 1.1464989185333252, | |
| "eval_runtime": 4.7642, | |
| "eval_samples_per_second": 60.032, | |
| "eval_steps_per_second": 0.63, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 100.92, | |
| "eval_accuracy": 0.5594405594405595, | |
| "eval_loss": 1.1443748474121094, | |
| "eval_runtime": 4.7025, | |
| "eval_samples_per_second": 60.819, | |
| "eval_steps_per_second": 0.638, | |
| "step": 328 | |
| }, | |
| { | |
| "epoch": 101.85, | |
| "eval_accuracy": 0.5629370629370629, | |
| "eval_loss": 1.1359333992004395, | |
| "eval_runtime": 4.6342, | |
| "eval_samples_per_second": 61.715, | |
| "eval_steps_per_second": 0.647, | |
| "step": 331 | |
| }, | |
| { | |
| "epoch": 102.77, | |
| "eval_accuracy": 0.5664335664335665, | |
| "eval_loss": 1.1271060705184937, | |
| "eval_runtime": 4.4245, | |
| "eval_samples_per_second": 64.639, | |
| "eval_steps_per_second": 0.678, | |
| "step": 334 | |
| }, | |
| { | |
| "epoch": 104.0, | |
| "eval_accuracy": 0.5769230769230769, | |
| "eval_loss": 1.109040379524231, | |
| "eval_runtime": 4.9047, | |
| "eval_samples_per_second": 58.311, | |
| "eval_steps_per_second": 0.612, | |
| "step": 338 | |
| }, | |
| { | |
| "epoch": 104.92, | |
| "eval_accuracy": 0.5944055944055944, | |
| "eval_loss": 1.0972033739089966, | |
| "eval_runtime": 4.5473, | |
| "eval_samples_per_second": 62.895, | |
| "eval_steps_per_second": 0.66, | |
| "step": 341 | |
| }, | |
| { | |
| "epoch": 105.85, | |
| "eval_accuracy": 0.6013986013986014, | |
| "eval_loss": 1.090105414390564, | |
| "eval_runtime": 3.7875, | |
| "eval_samples_per_second": 75.511, | |
| "eval_steps_per_second": 0.792, | |
| "step": 344 | |
| }, | |
| { | |
| "epoch": 106.77, | |
| "eval_accuracy": 0.6083916083916084, | |
| "eval_loss": 1.0809463262557983, | |
| "eval_runtime": 4.7656, | |
| "eval_samples_per_second": 60.014, | |
| "eval_steps_per_second": 0.63, | |
| "step": 347 | |
| }, | |
| { | |
| "epoch": 107.69, | |
| "grad_norm": 32308.33984375, | |
| "learning_rate": 2.222222222222222e-05, | |
| "loss": 0.8834, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 108.0, | |
| "eval_accuracy": 0.6118881118881119, | |
| "eval_loss": 1.0683268308639526, | |
| "eval_runtime": 4.3145, | |
| "eval_samples_per_second": 66.288, | |
| "eval_steps_per_second": 0.695, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 108.92, | |
| "eval_accuracy": 0.6223776223776224, | |
| "eval_loss": 1.0605404376983643, | |
| "eval_runtime": 4.6097, | |
| "eval_samples_per_second": 62.043, | |
| "eval_steps_per_second": 0.651, | |
| "step": 354 | |
| }, | |
| { | |
| "epoch": 109.85, | |
| "eval_accuracy": 0.6258741258741258, | |
| "eval_loss": 1.0562984943389893, | |
| "eval_runtime": 4.859, | |
| "eval_samples_per_second": 58.86, | |
| "eval_steps_per_second": 0.617, | |
| "step": 357 | |
| }, | |
| { | |
| "epoch": 110.77, | |
| "eval_accuracy": 0.6223776223776224, | |
| "eval_loss": 1.0537959337234497, | |
| "eval_runtime": 4.948, | |
| "eval_samples_per_second": 57.801, | |
| "eval_steps_per_second": 0.606, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 112.0, | |
| "eval_accuracy": 0.6153846153846154, | |
| "eval_loss": 1.0491102933883667, | |
| "eval_runtime": 4.1434, | |
| "eval_samples_per_second": 69.026, | |
| "eval_steps_per_second": 0.724, | |
| "step": 364 | |
| }, | |
| { | |
| "epoch": 112.92, | |
| "eval_accuracy": 0.6118881118881119, | |
| "eval_loss": 1.044057011604309, | |
| "eval_runtime": 4.3774, | |
| "eval_samples_per_second": 65.336, | |
| "eval_steps_per_second": 0.685, | |
| "step": 367 | |
| }, | |
| { | |
| "epoch": 113.85, | |
| "eval_accuracy": 0.6118881118881119, | |
| "eval_loss": 1.0357924699783325, | |
| "eval_runtime": 4.7038, | |
| "eval_samples_per_second": 60.801, | |
| "eval_steps_per_second": 0.638, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 114.77, | |
| "eval_accuracy": 0.6223776223776224, | |
| "eval_loss": 1.0194157361984253, | |
| "eval_runtime": 5.0902, | |
| "eval_samples_per_second": 56.187, | |
| "eval_steps_per_second": 0.589, | |
| "step": 373 | |
| }, | |
| { | |
| "epoch": 116.0, | |
| "eval_accuracy": 0.6293706293706294, | |
| "eval_loss": 1.0034115314483643, | |
| "eval_runtime": 4.386, | |
| "eval_samples_per_second": 65.208, | |
| "eval_steps_per_second": 0.684, | |
| "step": 377 | |
| }, | |
| { | |
| "epoch": 116.92, | |
| "eval_accuracy": 0.6258741258741258, | |
| "eval_loss": 0.9991269707679749, | |
| "eval_runtime": 5.2708, | |
| "eval_samples_per_second": 54.261, | |
| "eval_steps_per_second": 0.569, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 117.85, | |
| "eval_accuracy": 0.6258741258741258, | |
| "eval_loss": 0.9959561824798584, | |
| "eval_runtime": 4.7556, | |
| "eval_samples_per_second": 60.139, | |
| "eval_steps_per_second": 0.631, | |
| "step": 383 | |
| }, | |
| { | |
| "epoch": 118.77, | |
| "eval_accuracy": 0.6293706293706294, | |
| "eval_loss": 0.9911425113677979, | |
| "eval_runtime": 4.0817, | |
| "eval_samples_per_second": 70.068, | |
| "eval_steps_per_second": 0.735, | |
| "step": 386 | |
| }, | |
| { | |
| "epoch": 120.0, | |
| "eval_accuracy": 0.6433566433566433, | |
| "eval_loss": 0.9834115505218506, | |
| "eval_runtime": 4.0058, | |
| "eval_samples_per_second": 71.396, | |
| "eval_steps_per_second": 0.749, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 120.92, | |
| "eval_accuracy": 0.6433566433566433, | |
| "eval_loss": 0.9775691628456116, | |
| "eval_runtime": 4.3856, | |
| "eval_samples_per_second": 65.214, | |
| "eval_steps_per_second": 0.684, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 121.85, | |
| "eval_accuracy": 0.6433566433566433, | |
| "eval_loss": 0.9772741198539734, | |
| "eval_runtime": 4.6976, | |
| "eval_samples_per_second": 60.882, | |
| "eval_steps_per_second": 0.639, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 122.77, | |
| "eval_accuracy": 0.6433566433566433, | |
| "eval_loss": 0.9734641909599304, | |
| "eval_runtime": 4.6506, | |
| "eval_samples_per_second": 61.498, | |
| "eval_steps_per_second": 0.645, | |
| "step": 399 | |
| }, | |
| { | |
| "epoch": 123.08, | |
| "grad_norm": 27630.990234375, | |
| "learning_rate": 2.0634920634920633e-05, | |
| "loss": 0.7786, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 124.0, | |
| "eval_accuracy": 0.6398601398601399, | |
| "eval_loss": 0.9730696082115173, | |
| "eval_runtime": 3.9976, | |
| "eval_samples_per_second": 71.542, | |
| "eval_steps_per_second": 0.75, | |
| "step": 403 | |
| }, | |
| { | |
| "epoch": 124.92, | |
| "eval_accuracy": 0.6433566433566433, | |
| "eval_loss": 0.9727755188941956, | |
| "eval_runtime": 4.0553, | |
| "eval_samples_per_second": 70.525, | |
| "eval_steps_per_second": 0.74, | |
| "step": 406 | |
| }, | |
| { | |
| "epoch": 125.85, | |
| "eval_accuracy": 0.6573426573426573, | |
| "eval_loss": 0.9657326936721802, | |
| "eval_runtime": 4.4666, | |
| "eval_samples_per_second": 64.031, | |
| "eval_steps_per_second": 0.672, | |
| "step": 409 | |
| }, | |
| { | |
| "epoch": 126.77, | |
| "eval_accuracy": 0.6573426573426573, | |
| "eval_loss": 0.9547586441040039, | |
| "eval_runtime": 4.6999, | |
| "eval_samples_per_second": 60.852, | |
| "eval_steps_per_second": 0.638, | |
| "step": 412 | |
| }, | |
| { | |
| "epoch": 128.0, | |
| "eval_accuracy": 0.6643356643356644, | |
| "eval_loss": 0.942358136177063, | |
| "eval_runtime": 4.8438, | |
| "eval_samples_per_second": 59.045, | |
| "eval_steps_per_second": 0.619, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 128.92, | |
| "eval_accuracy": 0.6678321678321678, | |
| "eval_loss": 0.9391436576843262, | |
| "eval_runtime": 4.4506, | |
| "eval_samples_per_second": 64.261, | |
| "eval_steps_per_second": 0.674, | |
| "step": 419 | |
| }, | |
| { | |
| "epoch": 129.85, | |
| "eval_accuracy": 0.6678321678321678, | |
| "eval_loss": 0.9418392777442932, | |
| "eval_runtime": 4.2912, | |
| "eval_samples_per_second": 66.648, | |
| "eval_steps_per_second": 0.699, | |
| "step": 422 | |
| }, | |
| { | |
| "epoch": 130.77, | |
| "eval_accuracy": 0.6608391608391608, | |
| "eval_loss": 0.9476207494735718, | |
| "eval_runtime": 4.7281, | |
| "eval_samples_per_second": 60.49, | |
| "eval_steps_per_second": 0.635, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 132.0, | |
| "eval_accuracy": 0.6643356643356644, | |
| "eval_loss": 0.9457269310951233, | |
| "eval_runtime": 4.314, | |
| "eval_samples_per_second": 66.295, | |
| "eval_steps_per_second": 0.695, | |
| "step": 429 | |
| }, | |
| { | |
| "epoch": 132.92, | |
| "eval_accuracy": 0.6643356643356644, | |
| "eval_loss": 0.941338062286377, | |
| "eval_runtime": 3.916, | |
| "eval_samples_per_second": 73.033, | |
| "eval_steps_per_second": 0.766, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 133.85, | |
| "eval_accuracy": 0.6678321678321678, | |
| "eval_loss": 0.9334166049957275, | |
| "eval_runtime": 4.5886, | |
| "eval_samples_per_second": 62.329, | |
| "eval_steps_per_second": 0.654, | |
| "step": 435 | |
| }, | |
| { | |
| "epoch": 134.77, | |
| "eval_accuracy": 0.6678321678321678, | |
| "eval_loss": 0.9328890442848206, | |
| "eval_runtime": 4.1417, | |
| "eval_samples_per_second": 69.054, | |
| "eval_steps_per_second": 0.724, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 136.0, | |
| "eval_accuracy": 0.6713286713286714, | |
| "eval_loss": 0.9333996772766113, | |
| "eval_runtime": 4.538, | |
| "eval_samples_per_second": 63.023, | |
| "eval_steps_per_second": 0.661, | |
| "step": 442 | |
| }, | |
| { | |
| "epoch": 136.92, | |
| "eval_accuracy": 0.6713286713286714, | |
| "eval_loss": 0.9264596700668335, | |
| "eval_runtime": 4.6642, | |
| "eval_samples_per_second": 61.318, | |
| "eval_steps_per_second": 0.643, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 137.85, | |
| "eval_accuracy": 0.6713286713286714, | |
| "eval_loss": 0.9186587929725647, | |
| "eval_runtime": 4.5978, | |
| "eval_samples_per_second": 62.204, | |
| "eval_steps_per_second": 0.652, | |
| "step": 448 | |
| }, | |
| { | |
| "epoch": 138.46, | |
| "grad_norm": 34684.0078125, | |
| "learning_rate": 1.9047619047619046e-05, | |
| "loss": 0.7133, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 138.77, | |
| "eval_accuracy": 0.6678321678321678, | |
| "eval_loss": 0.916916012763977, | |
| "eval_runtime": 4.1718, | |
| "eval_samples_per_second": 68.556, | |
| "eval_steps_per_second": 0.719, | |
| "step": 451 | |
| }, | |
| { | |
| "epoch": 140.0, | |
| "eval_accuracy": 0.6713286713286714, | |
| "eval_loss": 0.9141567349433899, | |
| "eval_runtime": 4.8158, | |
| "eval_samples_per_second": 59.388, | |
| "eval_steps_per_second": 0.623, | |
| "step": 455 | |
| }, | |
| { | |
| "epoch": 140.92, | |
| "eval_accuracy": 0.6713286713286714, | |
| "eval_loss": 0.9131244421005249, | |
| "eval_runtime": 4.3984, | |
| "eval_samples_per_second": 65.024, | |
| "eval_steps_per_second": 0.682, | |
| "step": 458 | |
| }, | |
| { | |
| "epoch": 141.85, | |
| "eval_accuracy": 0.6783216783216783, | |
| "eval_loss": 0.9160958528518677, | |
| "eval_runtime": 3.9738, | |
| "eval_samples_per_second": 71.971, | |
| "eval_steps_per_second": 0.755, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 142.77, | |
| "eval_accuracy": 0.6678321678321678, | |
| "eval_loss": 0.9223662614822388, | |
| "eval_runtime": 3.7836, | |
| "eval_samples_per_second": 75.589, | |
| "eval_steps_per_second": 0.793, | |
| "step": 464 | |
| }, | |
| { | |
| "epoch": 144.0, | |
| "eval_accuracy": 0.6748251748251748, | |
| "eval_loss": 0.9139449000358582, | |
| "eval_runtime": 4.0554, | |
| "eval_samples_per_second": 70.522, | |
| "eval_steps_per_second": 0.74, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 144.92, | |
| "eval_accuracy": 0.6748251748251748, | |
| "eval_loss": 0.9089756608009338, | |
| "eval_runtime": 4.4989, | |
| "eval_samples_per_second": 63.571, | |
| "eval_steps_per_second": 0.667, | |
| "step": 471 | |
| }, | |
| { | |
| "epoch": 145.85, | |
| "eval_accuracy": 0.6713286713286714, | |
| "eval_loss": 0.9072948694229126, | |
| "eval_runtime": 3.984, | |
| "eval_samples_per_second": 71.788, | |
| "eval_steps_per_second": 0.753, | |
| "step": 474 | |
| }, | |
| { | |
| "epoch": 146.77, | |
| "eval_accuracy": 0.6608391608391608, | |
| "eval_loss": 0.9110231995582581, | |
| "eval_runtime": 4.596, | |
| "eval_samples_per_second": 62.228, | |
| "eval_steps_per_second": 0.653, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 148.0, | |
| "eval_accuracy": 0.6573426573426573, | |
| "eval_loss": 0.9167369604110718, | |
| "eval_runtime": 4.7051, | |
| "eval_samples_per_second": 60.785, | |
| "eval_steps_per_second": 0.638, | |
| "step": 481 | |
| }, | |
| { | |
| "epoch": 148.92, | |
| "eval_accuracy": 0.6643356643356644, | |
| "eval_loss": 0.9118071794509888, | |
| "eval_runtime": 3.9295, | |
| "eval_samples_per_second": 72.783, | |
| "eval_steps_per_second": 0.763, | |
| "step": 484 | |
| }, | |
| { | |
| "epoch": 149.85, | |
| "eval_accuracy": 0.6713286713286714, | |
| "eval_loss": 0.8996461629867554, | |
| "eval_runtime": 4.5063, | |
| "eval_samples_per_second": 63.466, | |
| "eval_steps_per_second": 0.666, | |
| "step": 487 | |
| }, | |
| { | |
| "epoch": 150.77, | |
| "eval_accuracy": 0.6748251748251748, | |
| "eval_loss": 0.8903929591178894, | |
| "eval_runtime": 4.0074, | |
| "eval_samples_per_second": 71.369, | |
| "eval_steps_per_second": 0.749, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 152.0, | |
| "eval_accuracy": 0.6748251748251748, | |
| "eval_loss": 0.8889052867889404, | |
| "eval_runtime": 4.2482, | |
| "eval_samples_per_second": 67.323, | |
| "eval_steps_per_second": 0.706, | |
| "step": 494 | |
| }, | |
| { | |
| "epoch": 152.92, | |
| "eval_accuracy": 0.6713286713286714, | |
| "eval_loss": 0.889894425868988, | |
| "eval_runtime": 4.7658, | |
| "eval_samples_per_second": 60.011, | |
| "eval_steps_per_second": 0.629, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 153.85, | |
| "grad_norm": 27670.865234375, | |
| "learning_rate": 1.746031746031746e-05, | |
| "loss": 0.6674, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 153.85, | |
| "eval_accuracy": 0.6748251748251748, | |
| "eval_loss": 0.887377917766571, | |
| "eval_runtime": 4.6951, | |
| "eval_samples_per_second": 60.915, | |
| "eval_steps_per_second": 0.639, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 154.77, | |
| "eval_accuracy": 0.6748251748251748, | |
| "eval_loss": 0.8873924016952515, | |
| "eval_runtime": 3.8042, | |
| "eval_samples_per_second": 75.181, | |
| "eval_steps_per_second": 0.789, | |
| "step": 503 | |
| }, | |
| { | |
| "epoch": 156.0, | |
| "eval_accuracy": 0.6748251748251748, | |
| "eval_loss": 0.8905075788497925, | |
| "eval_runtime": 3.9282, | |
| "eval_samples_per_second": 72.806, | |
| "eval_steps_per_second": 0.764, | |
| "step": 507 | |
| }, | |
| { | |
| "epoch": 156.92, | |
| "eval_accuracy": 0.6783216783216783, | |
| "eval_loss": 0.8881194591522217, | |
| "eval_runtime": 4.2085, | |
| "eval_samples_per_second": 67.957, | |
| "eval_steps_per_second": 0.713, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 157.85, | |
| "eval_accuracy": 0.6748251748251748, | |
| "eval_loss": 0.882903516292572, | |
| "eval_runtime": 5.345, | |
| "eval_samples_per_second": 53.508, | |
| "eval_steps_per_second": 0.561, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 158.77, | |
| "eval_accuracy": 0.6783216783216783, | |
| "eval_loss": 0.8809071183204651, | |
| "eval_runtime": 4.4142, | |
| "eval_samples_per_second": 64.791, | |
| "eval_steps_per_second": 0.68, | |
| "step": 516 | |
| }, | |
| { | |
| "epoch": 160.0, | |
| "eval_accuracy": 0.6783216783216783, | |
| "eval_loss": 0.8780828714370728, | |
| "eval_runtime": 3.6498, | |
| "eval_samples_per_second": 78.361, | |
| "eval_steps_per_second": 0.822, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 160.92, | |
| "eval_accuracy": 0.6818181818181818, | |
| "eval_loss": 0.8776365518569946, | |
| "eval_runtime": 3.4668, | |
| "eval_samples_per_second": 82.497, | |
| "eval_steps_per_second": 0.865, | |
| "step": 523 | |
| }, | |
| { | |
| "epoch": 161.85, | |
| "eval_accuracy": 0.6783216783216783, | |
| "eval_loss": 0.8795685768127441, | |
| "eval_runtime": 3.8004, | |
| "eval_samples_per_second": 75.256, | |
| "eval_steps_per_second": 0.789, | |
| "step": 526 | |
| }, | |
| { | |
| "epoch": 162.77, | |
| "eval_accuracy": 0.6818181818181818, | |
| "eval_loss": 0.8795468807220459, | |
| "eval_runtime": 3.8694, | |
| "eval_samples_per_second": 73.913, | |
| "eval_steps_per_second": 0.775, | |
| "step": 529 | |
| }, | |
| { | |
| "epoch": 164.0, | |
| "eval_accuracy": 0.6783216783216783, | |
| "eval_loss": 0.8797011971473694, | |
| "eval_runtime": 4.1348, | |
| "eval_samples_per_second": 69.169, | |
| "eval_steps_per_second": 0.726, | |
| "step": 533 | |
| }, | |
| { | |
| "epoch": 164.92, | |
| "eval_accuracy": 0.6783216783216783, | |
| "eval_loss": 0.8706856966018677, | |
| "eval_runtime": 4.5762, | |
| "eval_samples_per_second": 62.498, | |
| "eval_steps_per_second": 0.656, | |
| "step": 536 | |
| }, | |
| { | |
| "epoch": 165.85, | |
| "eval_accuracy": 0.6783216783216783, | |
| "eval_loss": 0.8697258830070496, | |
| "eval_runtime": 3.5794, | |
| "eval_samples_per_second": 79.901, | |
| "eval_steps_per_second": 0.838, | |
| "step": 539 | |
| }, | |
| { | |
| "epoch": 166.77, | |
| "eval_accuracy": 0.6783216783216783, | |
| "eval_loss": 0.8723975419998169, | |
| "eval_runtime": 5.761, | |
| "eval_samples_per_second": 49.644, | |
| "eval_steps_per_second": 0.521, | |
| "step": 542 | |
| }, | |
| { | |
| "epoch": 168.0, | |
| "eval_accuracy": 0.6748251748251748, | |
| "eval_loss": 0.870445966720581, | |
| "eval_runtime": 4.2907, | |
| "eval_samples_per_second": 66.656, | |
| "eval_steps_per_second": 0.699, | |
| "step": 546 | |
| }, | |
| { | |
| "epoch": 168.92, | |
| "eval_accuracy": 0.6748251748251748, | |
| "eval_loss": 0.8693636655807495, | |
| "eval_runtime": 4.5637, | |
| "eval_samples_per_second": 62.668, | |
| "eval_steps_per_second": 0.657, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 169.23, | |
| "grad_norm": 67537.203125, | |
| "learning_rate": 1.5873015873015872e-05, | |
| "loss": 0.6305, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 169.85, | |
| "eval_accuracy": 0.6748251748251748, | |
| "eval_loss": 0.8739539980888367, | |
| "eval_runtime": 4.5496, | |
| "eval_samples_per_second": 62.862, | |
| "eval_steps_per_second": 0.659, | |
| "step": 552 | |
| }, | |
| { | |
| "epoch": 170.77, | |
| "eval_accuracy": 0.6748251748251748, | |
| "eval_loss": 0.8713040947914124, | |
| "eval_runtime": 4.3907, | |
| "eval_samples_per_second": 65.138, | |
| "eval_steps_per_second": 0.683, | |
| "step": 555 | |
| }, | |
| { | |
| "epoch": 172.0, | |
| "eval_accuracy": 0.6783216783216783, | |
| "eval_loss": 0.8682331442832947, | |
| "eval_runtime": 4.1777, | |
| "eval_samples_per_second": 68.459, | |
| "eval_steps_per_second": 0.718, | |
| "step": 559 | |
| }, | |
| { | |
| "epoch": 172.92, | |
| "eval_accuracy": 0.6783216783216783, | |
| "eval_loss": 0.868798553943634, | |
| "eval_runtime": 3.5218, | |
| "eval_samples_per_second": 81.207, | |
| "eval_steps_per_second": 0.852, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 173.85, | |
| "eval_accuracy": 0.6818181818181818, | |
| "eval_loss": 0.8692768216133118, | |
| "eval_runtime": 5.0064, | |
| "eval_samples_per_second": 57.127, | |
| "eval_steps_per_second": 0.599, | |
| "step": 565 | |
| }, | |
| { | |
| "epoch": 174.77, | |
| "eval_accuracy": 0.6783216783216783, | |
| "eval_loss": 0.874369204044342, | |
| "eval_runtime": 4.1257, | |
| "eval_samples_per_second": 69.322, | |
| "eval_steps_per_second": 0.727, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 176.0, | |
| "eval_accuracy": 0.6783216783216783, | |
| "eval_loss": 0.8759630918502808, | |
| "eval_runtime": 4.4848, | |
| "eval_samples_per_second": 63.771, | |
| "eval_steps_per_second": 0.669, | |
| "step": 572 | |
| }, | |
| { | |
| "epoch": 176.92, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.8696449398994446, | |
| "eval_runtime": 4.1683, | |
| "eval_samples_per_second": 68.613, | |
| "eval_steps_per_second": 0.72, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 177.85, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.8668593764305115, | |
| "eval_runtime": 4.3889, | |
| "eval_samples_per_second": 65.165, | |
| "eval_steps_per_second": 0.684, | |
| "step": 578 | |
| }, | |
| { | |
| "epoch": 178.77, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.8641146421432495, | |
| "eval_runtime": 4.0742, | |
| "eval_samples_per_second": 70.197, | |
| "eval_steps_per_second": 0.736, | |
| "step": 581 | |
| }, | |
| { | |
| "epoch": 180.0, | |
| "eval_accuracy": 0.6713286713286714, | |
| "eval_loss": 0.8696537613868713, | |
| "eval_runtime": 4.1345, | |
| "eval_samples_per_second": 69.173, | |
| "eval_steps_per_second": 0.726, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 180.92, | |
| "eval_accuracy": 0.6748251748251748, | |
| "eval_loss": 0.8678367733955383, | |
| "eval_runtime": 3.994, | |
| "eval_samples_per_second": 71.607, | |
| "eval_steps_per_second": 0.751, | |
| "step": 588 | |
| }, | |
| { | |
| "epoch": 181.85, | |
| "eval_accuracy": 0.6818181818181818, | |
| "eval_loss": 0.8620542287826538, | |
| "eval_runtime": 4.32, | |
| "eval_samples_per_second": 66.204, | |
| "eval_steps_per_second": 0.694, | |
| "step": 591 | |
| }, | |
| { | |
| "epoch": 182.77, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.8557011485099792, | |
| "eval_runtime": 4.7717, | |
| "eval_samples_per_second": 59.937, | |
| "eval_steps_per_second": 0.629, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 184.0, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.848114013671875, | |
| "eval_runtime": 4.0948, | |
| "eval_samples_per_second": 69.845, | |
| "eval_steps_per_second": 0.733, | |
| "step": 598 | |
| }, | |
| { | |
| "epoch": 184.62, | |
| "grad_norm": 36502.2421875, | |
| "learning_rate": 1.4285714285714285e-05, | |
| "loss": 0.6095, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 184.92, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.8428906798362732, | |
| "eval_runtime": 4.6887, | |
| "eval_samples_per_second": 60.997, | |
| "eval_steps_per_second": 0.64, | |
| "step": 601 | |
| }, | |
| { | |
| "epoch": 185.85, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.8413122892379761, | |
| "eval_runtime": 3.8998, | |
| "eval_samples_per_second": 73.337, | |
| "eval_steps_per_second": 0.769, | |
| "step": 604 | |
| }, | |
| { | |
| "epoch": 186.77, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8402045965194702, | |
| "eval_runtime": 4.1508, | |
| "eval_samples_per_second": 68.903, | |
| "eval_steps_per_second": 0.723, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 188.0, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.8415275812149048, | |
| "eval_runtime": 4.4966, | |
| "eval_samples_per_second": 63.603, | |
| "eval_steps_per_second": 0.667, | |
| "step": 611 | |
| }, | |
| { | |
| "epoch": 188.92, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8409523963928223, | |
| "eval_runtime": 4.0007, | |
| "eval_samples_per_second": 71.488, | |
| "eval_steps_per_second": 0.75, | |
| "step": 614 | |
| }, | |
| { | |
| "epoch": 189.85, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.8388563394546509, | |
| "eval_runtime": 4.5212, | |
| "eval_samples_per_second": 63.257, | |
| "eval_steps_per_second": 0.664, | |
| "step": 617 | |
| }, | |
| { | |
| "epoch": 190.77, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.8353860378265381, | |
| "eval_runtime": 4.6112, | |
| "eval_samples_per_second": 62.023, | |
| "eval_steps_per_second": 0.651, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 192.0, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.8356983661651611, | |
| "eval_runtime": 4.6563, | |
| "eval_samples_per_second": 61.422, | |
| "eval_steps_per_second": 0.644, | |
| "step": 624 | |
| }, | |
| { | |
| "epoch": 192.92, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8400572538375854, | |
| "eval_runtime": 5.369, | |
| "eval_samples_per_second": 53.269, | |
| "eval_steps_per_second": 0.559, | |
| "step": 627 | |
| }, | |
| { | |
| "epoch": 193.85, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.844892144203186, | |
| "eval_runtime": 4.0956, | |
| "eval_samples_per_second": 69.831, | |
| "eval_steps_per_second": 0.732, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 194.77, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8478845357894897, | |
| "eval_runtime": 4.6385, | |
| "eval_samples_per_second": 61.658, | |
| "eval_steps_per_second": 0.647, | |
| "step": 633 | |
| }, | |
| { | |
| "epoch": 196.0, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8454630374908447, | |
| "eval_runtime": 4.4423, | |
| "eval_samples_per_second": 64.381, | |
| "eval_steps_per_second": 0.675, | |
| "step": 637 | |
| }, | |
| { | |
| "epoch": 196.92, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8421822190284729, | |
| "eval_runtime": 3.8632, | |
| "eval_samples_per_second": 74.032, | |
| "eval_steps_per_second": 0.777, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 197.85, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8425044417381287, | |
| "eval_runtime": 5.1031, | |
| "eval_samples_per_second": 56.044, | |
| "eval_steps_per_second": 0.588, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 198.77, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8436546325683594, | |
| "eval_runtime": 4.9685, | |
| "eval_samples_per_second": 57.562, | |
| "eval_steps_per_second": 0.604, | |
| "step": 646 | |
| }, | |
| { | |
| "epoch": 200.0, | |
| "grad_norm": 66285.84375, | |
| "learning_rate": 1.2698412698412699e-05, | |
| "loss": 0.5908, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 200.0, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8366544246673584, | |
| "eval_runtime": 4.3292, | |
| "eval_samples_per_second": 66.063, | |
| "eval_steps_per_second": 0.693, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 200.92, | |
| "eval_accuracy": 0.6993006993006993, | |
| "eval_loss": 0.834704577922821, | |
| "eval_runtime": 4.7887, | |
| "eval_samples_per_second": 59.724, | |
| "eval_steps_per_second": 0.626, | |
| "step": 653 | |
| }, | |
| { | |
| "epoch": 201.85, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8286824226379395, | |
| "eval_runtime": 4.388, | |
| "eval_samples_per_second": 65.178, | |
| "eval_steps_per_second": 0.684, | |
| "step": 656 | |
| }, | |
| { | |
| "epoch": 202.77, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8259890079498291, | |
| "eval_runtime": 3.7365, | |
| "eval_samples_per_second": 76.543, | |
| "eval_steps_per_second": 0.803, | |
| "step": 659 | |
| }, | |
| { | |
| "epoch": 204.0, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8263576626777649, | |
| "eval_runtime": 4.9175, | |
| "eval_samples_per_second": 58.159, | |
| "eval_steps_per_second": 0.61, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 204.92, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8295235633850098, | |
| "eval_runtime": 4.3071, | |
| "eval_samples_per_second": 66.401, | |
| "eval_steps_per_second": 0.697, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 205.85, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8301726579666138, | |
| "eval_runtime": 3.7499, | |
| "eval_samples_per_second": 76.268, | |
| "eval_steps_per_second": 0.8, | |
| "step": 669 | |
| }, | |
| { | |
| "epoch": 206.77, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.828461766242981, | |
| "eval_runtime": 3.8022, | |
| "eval_samples_per_second": 75.219, | |
| "eval_steps_per_second": 0.789, | |
| "step": 672 | |
| }, | |
| { | |
| "epoch": 208.0, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.831078052520752, | |
| "eval_runtime": 4.2868, | |
| "eval_samples_per_second": 66.716, | |
| "eval_steps_per_second": 0.7, | |
| "step": 676 | |
| }, | |
| { | |
| "epoch": 208.92, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8320910334587097, | |
| "eval_runtime": 4.474, | |
| "eval_samples_per_second": 63.925, | |
| "eval_steps_per_second": 0.671, | |
| "step": 679 | |
| }, | |
| { | |
| "epoch": 209.85, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8305550813674927, | |
| "eval_runtime": 4.1246, | |
| "eval_samples_per_second": 69.341, | |
| "eval_steps_per_second": 0.727, | |
| "step": 682 | |
| }, | |
| { | |
| "epoch": 210.77, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8302868604660034, | |
| "eval_runtime": 4.9131, | |
| "eval_samples_per_second": 58.212, | |
| "eval_steps_per_second": 0.611, | |
| "step": 685 | |
| }, | |
| { | |
| "epoch": 212.0, | |
| "eval_accuracy": 0.6993006993006993, | |
| "eval_loss": 0.8256182670593262, | |
| "eval_runtime": 4.5542, | |
| "eval_samples_per_second": 62.8, | |
| "eval_steps_per_second": 0.659, | |
| "step": 689 | |
| }, | |
| { | |
| "epoch": 212.92, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8230299353599548, | |
| "eval_runtime": 4.2845, | |
| "eval_samples_per_second": 66.752, | |
| "eval_steps_per_second": 0.7, | |
| "step": 692 | |
| }, | |
| { | |
| "epoch": 213.85, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.819442868232727, | |
| "eval_runtime": 4.4153, | |
| "eval_samples_per_second": 64.775, | |
| "eval_steps_per_second": 0.679, | |
| "step": 695 | |
| }, | |
| { | |
| "epoch": 214.77, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8183168768882751, | |
| "eval_runtime": 4.9672, | |
| "eval_samples_per_second": 57.577, | |
| "eval_steps_per_second": 0.604, | |
| "step": 698 | |
| }, | |
| { | |
| "epoch": 215.38, | |
| "grad_norm": 29832.03125, | |
| "learning_rate": 1.111111111111111e-05, | |
| "loss": 0.5763, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 216.0, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8231977224349976, | |
| "eval_runtime": 4.6354, | |
| "eval_samples_per_second": 61.699, | |
| "eval_steps_per_second": 0.647, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 216.92, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.8236932158470154, | |
| "eval_runtime": 3.7182, | |
| "eval_samples_per_second": 76.92, | |
| "eval_steps_per_second": 0.807, | |
| "step": 705 | |
| }, | |
| { | |
| "epoch": 217.85, | |
| "eval_accuracy": 0.6993006993006993, | |
| "eval_loss": 0.8195610642433167, | |
| "eval_runtime": 3.5502, | |
| "eval_samples_per_second": 80.56, | |
| "eval_steps_per_second": 0.845, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 218.77, | |
| "eval_accuracy": 0.6993006993006993, | |
| "eval_loss": 0.8142436742782593, | |
| "eval_runtime": 4.9155, | |
| "eval_samples_per_second": 58.184, | |
| "eval_steps_per_second": 0.61, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 220.0, | |
| "eval_accuracy": 0.6993006993006993, | |
| "eval_loss": 0.8115321397781372, | |
| "eval_runtime": 4.0939, | |
| "eval_samples_per_second": 69.86, | |
| "eval_steps_per_second": 0.733, | |
| "step": 715 | |
| }, | |
| { | |
| "epoch": 220.92, | |
| "eval_accuracy": 0.6993006993006993, | |
| "eval_loss": 0.8130100965499878, | |
| "eval_runtime": 4.2197, | |
| "eval_samples_per_second": 67.777, | |
| "eval_steps_per_second": 0.711, | |
| "step": 718 | |
| }, | |
| { | |
| "epoch": 221.85, | |
| "eval_accuracy": 0.7027972027972028, | |
| "eval_loss": 0.8156144022941589, | |
| "eval_runtime": 4.2344, | |
| "eval_samples_per_second": 67.542, | |
| "eval_steps_per_second": 0.708, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 222.77, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8200713992118835, | |
| "eval_runtime": 4.8181, | |
| "eval_samples_per_second": 59.36, | |
| "eval_steps_per_second": 0.623, | |
| "step": 724 | |
| }, | |
| { | |
| "epoch": 224.0, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8227414488792419, | |
| "eval_runtime": 4.5671, | |
| "eval_samples_per_second": 62.621, | |
| "eval_steps_per_second": 0.657, | |
| "step": 728 | |
| }, | |
| { | |
| "epoch": 224.92, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8232228755950928, | |
| "eval_runtime": 5.221, | |
| "eval_samples_per_second": 54.779, | |
| "eval_steps_per_second": 0.575, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 225.85, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8198325634002686, | |
| "eval_runtime": 4.2136, | |
| "eval_samples_per_second": 67.875, | |
| "eval_steps_per_second": 0.712, | |
| "step": 734 | |
| }, | |
| { | |
| "epoch": 226.77, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8151125311851501, | |
| "eval_runtime": 4.8801, | |
| "eval_samples_per_second": 58.606, | |
| "eval_steps_per_second": 0.615, | |
| "step": 737 | |
| }, | |
| { | |
| "epoch": 228.0, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8136410713195801, | |
| "eval_runtime": 5.2461, | |
| "eval_samples_per_second": 54.516, | |
| "eval_steps_per_second": 0.572, | |
| "step": 741 | |
| }, | |
| { | |
| "epoch": 228.92, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8134062886238098, | |
| "eval_runtime": 3.6429, | |
| "eval_samples_per_second": 78.509, | |
| "eval_steps_per_second": 0.824, | |
| "step": 744 | |
| }, | |
| { | |
| "epoch": 229.85, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8123226761817932, | |
| "eval_runtime": 4.8374, | |
| "eval_samples_per_second": 59.122, | |
| "eval_steps_per_second": 0.62, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 230.77, | |
| "grad_norm": 27062.134765625, | |
| "learning_rate": 9.523809523809523e-06, | |
| "loss": 0.57, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 230.77, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8095433115959167, | |
| "eval_runtime": 3.9409, | |
| "eval_samples_per_second": 72.572, | |
| "eval_steps_per_second": 0.761, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 232.0, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8082302212715149, | |
| "eval_runtime": 4.0933, | |
| "eval_samples_per_second": 69.87, | |
| "eval_steps_per_second": 0.733, | |
| "step": 754 | |
| }, | |
| { | |
| "epoch": 232.92, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8084114193916321, | |
| "eval_runtime": 4.4952, | |
| "eval_samples_per_second": 63.624, | |
| "eval_steps_per_second": 0.667, | |
| "step": 757 | |
| }, | |
| { | |
| "epoch": 233.85, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8113557696342468, | |
| "eval_runtime": 4.6955, | |
| "eval_samples_per_second": 60.909, | |
| "eval_steps_per_second": 0.639, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 234.77, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8130276799201965, | |
| "eval_runtime": 4.9303, | |
| "eval_samples_per_second": 58.009, | |
| "eval_steps_per_second": 0.608, | |
| "step": 763 | |
| }, | |
| { | |
| "epoch": 236.0, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8153804540634155, | |
| "eval_runtime": 3.6663, | |
| "eval_samples_per_second": 78.007, | |
| "eval_steps_per_second": 0.818, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 236.92, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8160205483436584, | |
| "eval_runtime": 4.6226, | |
| "eval_samples_per_second": 61.87, | |
| "eval_steps_per_second": 0.649, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 237.85, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.8126419186592102, | |
| "eval_runtime": 4.6278, | |
| "eval_samples_per_second": 61.801, | |
| "eval_steps_per_second": 0.648, | |
| "step": 773 | |
| }, | |
| { | |
| "epoch": 238.77, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.8113960027694702, | |
| "eval_runtime": 3.8362, | |
| "eval_samples_per_second": 74.552, | |
| "eval_steps_per_second": 0.782, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 240.0, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8041169047355652, | |
| "eval_runtime": 5.2095, | |
| "eval_samples_per_second": 54.9, | |
| "eval_steps_per_second": 0.576, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 240.92, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8005608916282654, | |
| "eval_runtime": 4.0128, | |
| "eval_samples_per_second": 71.273, | |
| "eval_steps_per_second": 0.748, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 241.85, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.7987480163574219, | |
| "eval_runtime": 4.8789, | |
| "eval_samples_per_second": 58.619, | |
| "eval_steps_per_second": 0.615, | |
| "step": 786 | |
| }, | |
| { | |
| "epoch": 242.77, | |
| "eval_accuracy": 0.6993006993006993, | |
| "eval_loss": 0.7977189421653748, | |
| "eval_runtime": 4.5854, | |
| "eval_samples_per_second": 62.372, | |
| "eval_steps_per_second": 0.654, | |
| "step": 789 | |
| }, | |
| { | |
| "epoch": 244.0, | |
| "eval_accuracy": 0.6993006993006993, | |
| "eval_loss": 0.8001275658607483, | |
| "eval_runtime": 4.7528, | |
| "eval_samples_per_second": 60.175, | |
| "eval_steps_per_second": 0.631, | |
| "step": 793 | |
| }, | |
| { | |
| "epoch": 244.92, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8043994903564453, | |
| "eval_runtime": 4.2699, | |
| "eval_samples_per_second": 66.98, | |
| "eval_steps_per_second": 0.703, | |
| "step": 796 | |
| }, | |
| { | |
| "epoch": 245.85, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8082275390625, | |
| "eval_runtime": 4.2996, | |
| "eval_samples_per_second": 66.518, | |
| "eval_steps_per_second": 0.698, | |
| "step": 799 | |
| }, | |
| { | |
| "epoch": 246.15, | |
| "grad_norm": 99001.8359375, | |
| "learning_rate": 7.936507936507936e-06, | |
| "loss": 0.5456, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 246.77, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.8120755553245544, | |
| "eval_runtime": 4.5242, | |
| "eval_samples_per_second": 63.216, | |
| "eval_steps_per_second": 0.663, | |
| "step": 802 | |
| }, | |
| { | |
| "epoch": 248.0, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.8106970191001892, | |
| "eval_runtime": 4.4479, | |
| "eval_samples_per_second": 64.3, | |
| "eval_steps_per_second": 0.674, | |
| "step": 806 | |
| }, | |
| { | |
| "epoch": 248.92, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.806368887424469, | |
| "eval_runtime": 4.1522, | |
| "eval_samples_per_second": 68.88, | |
| "eval_steps_per_second": 0.723, | |
| "step": 809 | |
| }, | |
| { | |
| "epoch": 249.85, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8042352199554443, | |
| "eval_runtime": 4.4213, | |
| "eval_samples_per_second": 64.687, | |
| "eval_steps_per_second": 0.679, | |
| "step": 812 | |
| }, | |
| { | |
| "epoch": 250.77, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8005724549293518, | |
| "eval_runtime": 4.4134, | |
| "eval_samples_per_second": 64.802, | |
| "eval_steps_per_second": 0.68, | |
| "step": 815 | |
| }, | |
| { | |
| "epoch": 252.0, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.7968676090240479, | |
| "eval_runtime": 3.8229, | |
| "eval_samples_per_second": 74.812, | |
| "eval_steps_per_second": 0.785, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 252.92, | |
| "eval_accuracy": 0.6993006993006993, | |
| "eval_loss": 0.7954707741737366, | |
| "eval_runtime": 4.2693, | |
| "eval_samples_per_second": 66.99, | |
| "eval_steps_per_second": 0.703, | |
| "step": 822 | |
| }, | |
| { | |
| "epoch": 253.85, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.7973347902297974, | |
| "eval_runtime": 4.1401, | |
| "eval_samples_per_second": 69.081, | |
| "eval_steps_per_second": 0.725, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 254.77, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.8001494407653809, | |
| "eval_runtime": 4.4851, | |
| "eval_samples_per_second": 63.767, | |
| "eval_steps_per_second": 0.669, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 256.0, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.80350661277771, | |
| "eval_runtime": 4.4996, | |
| "eval_samples_per_second": 63.562, | |
| "eval_steps_per_second": 0.667, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 256.92, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.8035485148429871, | |
| "eval_runtime": 4.5713, | |
| "eval_samples_per_second": 62.564, | |
| "eval_steps_per_second": 0.656, | |
| "step": 835 | |
| }, | |
| { | |
| "epoch": 257.85, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8012282252311707, | |
| "eval_runtime": 4.0638, | |
| "eval_samples_per_second": 70.377, | |
| "eval_steps_per_second": 0.738, | |
| "step": 838 | |
| }, | |
| { | |
| "epoch": 258.77, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.8000492453575134, | |
| "eval_runtime": 4.443, | |
| "eval_samples_per_second": 64.372, | |
| "eval_steps_per_second": 0.675, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 260.0, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7963055968284607, | |
| "eval_runtime": 5.2655, | |
| "eval_samples_per_second": 54.316, | |
| "eval_steps_per_second": 0.57, | |
| "step": 845 | |
| }, | |
| { | |
| "epoch": 260.92, | |
| "eval_accuracy": 0.6958041958041958, | |
| "eval_loss": 0.7927840352058411, | |
| "eval_runtime": 5.1407, | |
| "eval_samples_per_second": 55.634, | |
| "eval_steps_per_second": 0.584, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 261.54, | |
| "grad_norm": 24108.591796875, | |
| "learning_rate": 6.349206349206349e-06, | |
| "loss": 0.5369, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 261.85, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.7919009327888489, | |
| "eval_runtime": 3.8577, | |
| "eval_samples_per_second": 74.138, | |
| "eval_steps_per_second": 0.778, | |
| "step": 851 | |
| }, | |
| { | |
| "epoch": 262.77, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.791265606880188, | |
| "eval_runtime": 4.1966, | |
| "eval_samples_per_second": 68.151, | |
| "eval_steps_per_second": 0.715, | |
| "step": 854 | |
| }, | |
| { | |
| "epoch": 264.0, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7929325699806213, | |
| "eval_runtime": 4.063, | |
| "eval_samples_per_second": 70.391, | |
| "eval_steps_per_second": 0.738, | |
| "step": 858 | |
| }, | |
| { | |
| "epoch": 264.92, | |
| "eval_accuracy": 0.6818181818181818, | |
| "eval_loss": 0.7954928278923035, | |
| "eval_runtime": 4.3933, | |
| "eval_samples_per_second": 65.099, | |
| "eval_steps_per_second": 0.683, | |
| "step": 861 | |
| }, | |
| { | |
| "epoch": 265.85, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7962778210639954, | |
| "eval_runtime": 4.4424, | |
| "eval_samples_per_second": 64.38, | |
| "eval_steps_per_second": 0.675, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 266.77, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7951834201812744, | |
| "eval_runtime": 4.2605, | |
| "eval_samples_per_second": 67.128, | |
| "eval_steps_per_second": 0.704, | |
| "step": 867 | |
| }, | |
| { | |
| "epoch": 268.0, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7936495542526245, | |
| "eval_runtime": 4.9467, | |
| "eval_samples_per_second": 57.816, | |
| "eval_steps_per_second": 0.606, | |
| "step": 871 | |
| }, | |
| { | |
| "epoch": 268.92, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7928897738456726, | |
| "eval_runtime": 4.9925, | |
| "eval_samples_per_second": 57.286, | |
| "eval_steps_per_second": 0.601, | |
| "step": 874 | |
| }, | |
| { | |
| "epoch": 269.85, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7933365702629089, | |
| "eval_runtime": 4.4133, | |
| "eval_samples_per_second": 64.804, | |
| "eval_steps_per_second": 0.68, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 270.77, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7940818071365356, | |
| "eval_runtime": 4.0519, | |
| "eval_samples_per_second": 70.584, | |
| "eval_steps_per_second": 0.74, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 272.0, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7939559817314148, | |
| "eval_runtime": 4.2845, | |
| "eval_samples_per_second": 66.753, | |
| "eval_steps_per_second": 0.7, | |
| "step": 884 | |
| }, | |
| { | |
| "epoch": 272.92, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7929409742355347, | |
| "eval_runtime": 4.885, | |
| "eval_samples_per_second": 58.546, | |
| "eval_steps_per_second": 0.614, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 273.85, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7929646968841553, | |
| "eval_runtime": 3.7177, | |
| "eval_samples_per_second": 76.929, | |
| "eval_steps_per_second": 0.807, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 274.77, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7942932844161987, | |
| "eval_runtime": 4.7663, | |
| "eval_samples_per_second": 60.004, | |
| "eval_steps_per_second": 0.629, | |
| "step": 893 | |
| }, | |
| { | |
| "epoch": 276.0, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7943535447120667, | |
| "eval_runtime": 4.0017, | |
| "eval_samples_per_second": 71.47, | |
| "eval_steps_per_second": 0.75, | |
| "step": 897 | |
| }, | |
| { | |
| "epoch": 276.92, | |
| "grad_norm": 30744.533203125, | |
| "learning_rate": 4.7619047619047615e-06, | |
| "loss": 0.5388, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 276.92, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7933218479156494, | |
| "eval_runtime": 4.3013, | |
| "eval_samples_per_second": 66.492, | |
| "eval_steps_per_second": 0.697, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 277.85, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7914408445358276, | |
| "eval_runtime": 4.8732, | |
| "eval_samples_per_second": 58.689, | |
| "eval_steps_per_second": 0.616, | |
| "step": 903 | |
| }, | |
| { | |
| "epoch": 278.77, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7903594970703125, | |
| "eval_runtime": 4.6519, | |
| "eval_samples_per_second": 61.48, | |
| "eval_steps_per_second": 0.645, | |
| "step": 906 | |
| }, | |
| { | |
| "epoch": 280.0, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7888299822807312, | |
| "eval_runtime": 4.5788, | |
| "eval_samples_per_second": 62.462, | |
| "eval_steps_per_second": 0.655, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 280.92, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7900360822677612, | |
| "eval_runtime": 4.5971, | |
| "eval_samples_per_second": 62.213, | |
| "eval_steps_per_second": 0.653, | |
| "step": 913 | |
| }, | |
| { | |
| "epoch": 281.85, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7905992865562439, | |
| "eval_runtime": 4.4545, | |
| "eval_samples_per_second": 64.205, | |
| "eval_steps_per_second": 0.673, | |
| "step": 916 | |
| }, | |
| { | |
| "epoch": 282.77, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7911333441734314, | |
| "eval_runtime": 4.4274, | |
| "eval_samples_per_second": 64.598, | |
| "eval_steps_per_second": 0.678, | |
| "step": 919 | |
| }, | |
| { | |
| "epoch": 284.0, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7906560897827148, | |
| "eval_runtime": 3.9207, | |
| "eval_samples_per_second": 72.947, | |
| "eval_steps_per_second": 0.765, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 284.92, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7906984686851501, | |
| "eval_runtime": 4.5603, | |
| "eval_samples_per_second": 62.715, | |
| "eval_steps_per_second": 0.658, | |
| "step": 926 | |
| }, | |
| { | |
| "epoch": 285.85, | |
| "eval_accuracy": 0.6818181818181818, | |
| "eval_loss": 0.7905350923538208, | |
| "eval_runtime": 4.8134, | |
| "eval_samples_per_second": 59.418, | |
| "eval_steps_per_second": 0.623, | |
| "step": 929 | |
| }, | |
| { | |
| "epoch": 286.77, | |
| "eval_accuracy": 0.6818181818181818, | |
| "eval_loss": 0.7899833917617798, | |
| "eval_runtime": 4.0697, | |
| "eval_samples_per_second": 70.275, | |
| "eval_steps_per_second": 0.737, | |
| "step": 932 | |
| }, | |
| { | |
| "epoch": 288.0, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7901102304458618, | |
| "eval_runtime": 4.0126, | |
| "eval_samples_per_second": 71.276, | |
| "eval_steps_per_second": 0.748, | |
| "step": 936 | |
| }, | |
| { | |
| "epoch": 288.92, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7902336120605469, | |
| "eval_runtime": 3.8328, | |
| "eval_samples_per_second": 74.619, | |
| "eval_steps_per_second": 0.783, | |
| "step": 939 | |
| }, | |
| { | |
| "epoch": 289.85, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7909765839576721, | |
| "eval_runtime": 3.9497, | |
| "eval_samples_per_second": 72.411, | |
| "eval_steps_per_second": 0.76, | |
| "step": 942 | |
| }, | |
| { | |
| "epoch": 290.77, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7913976907730103, | |
| "eval_runtime": 4.7881, | |
| "eval_samples_per_second": 59.731, | |
| "eval_steps_per_second": 0.627, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 292.0, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7919970750808716, | |
| "eval_runtime": 4.0436, | |
| "eval_samples_per_second": 70.729, | |
| "eval_steps_per_second": 0.742, | |
| "step": 949 | |
| }, | |
| { | |
| "epoch": 292.31, | |
| "grad_norm": 41198.3515625, | |
| "learning_rate": 3.1746031746031746e-06, | |
| "loss": 0.5261, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 292.92, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7927921414375305, | |
| "eval_runtime": 3.9219, | |
| "eval_samples_per_second": 72.923, | |
| "eval_steps_per_second": 0.765, | |
| "step": 952 | |
| }, | |
| { | |
| "epoch": 293.85, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.793153703212738, | |
| "eval_runtime": 4.3649, | |
| "eval_samples_per_second": 65.522, | |
| "eval_steps_per_second": 0.687, | |
| "step": 955 | |
| }, | |
| { | |
| "epoch": 294.77, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7925400733947754, | |
| "eval_runtime": 4.2064, | |
| "eval_samples_per_second": 67.992, | |
| "eval_steps_per_second": 0.713, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 296.0, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7922278046607971, | |
| "eval_runtime": 4.03, | |
| "eval_samples_per_second": 70.968, | |
| "eval_steps_per_second": 0.744, | |
| "step": 962 | |
| }, | |
| { | |
| "epoch": 296.92, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7919090986251831, | |
| "eval_runtime": 4.4889, | |
| "eval_samples_per_second": 63.713, | |
| "eval_steps_per_second": 0.668, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 297.85, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7922202348709106, | |
| "eval_runtime": 4.3742, | |
| "eval_samples_per_second": 65.383, | |
| "eval_steps_per_second": 0.686, | |
| "step": 968 | |
| }, | |
| { | |
| "epoch": 298.77, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7921380400657654, | |
| "eval_runtime": 4.27, | |
| "eval_samples_per_second": 66.979, | |
| "eval_steps_per_second": 0.703, | |
| "step": 971 | |
| }, | |
| { | |
| "epoch": 300.0, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7912278175354004, | |
| "eval_runtime": 4.209, | |
| "eval_samples_per_second": 67.95, | |
| "eval_steps_per_second": 0.713, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 300.92, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7907286882400513, | |
| "eval_runtime": 4.5975, | |
| "eval_samples_per_second": 62.208, | |
| "eval_steps_per_second": 0.653, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 301.85, | |
| "eval_accuracy": 0.6853146853146853, | |
| "eval_loss": 0.7895866632461548, | |
| "eval_runtime": 4.0629, | |
| "eval_samples_per_second": 70.394, | |
| "eval_steps_per_second": 0.738, | |
| "step": 981 | |
| }, | |
| { | |
| "epoch": 302.77, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7885376811027527, | |
| "eval_runtime": 4.0112, | |
| "eval_samples_per_second": 71.301, | |
| "eval_steps_per_second": 0.748, | |
| "step": 984 | |
| }, | |
| { | |
| "epoch": 304.0, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7877256870269775, | |
| "eval_runtime": 4.4199, | |
| "eval_samples_per_second": 64.708, | |
| "eval_steps_per_second": 0.679, | |
| "step": 988 | |
| }, | |
| { | |
| "epoch": 304.92, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7874112725257874, | |
| "eval_runtime": 4.0366, | |
| "eval_samples_per_second": 70.852, | |
| "eval_steps_per_second": 0.743, | |
| "step": 991 | |
| }, | |
| { | |
| "epoch": 305.85, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7876228094100952, | |
| "eval_runtime": 4.3519, | |
| "eval_samples_per_second": 65.718, | |
| "eval_steps_per_second": 0.689, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 306.77, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7879106402397156, | |
| "eval_runtime": 5.3443, | |
| "eval_samples_per_second": 53.515, | |
| "eval_steps_per_second": 0.561, | |
| "step": 997 | |
| }, | |
| { | |
| "epoch": 307.69, | |
| "grad_norm": 31167.6875, | |
| "learning_rate": 1.5873015873015873e-06, | |
| "loss": 0.5188, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 308.0, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7883804440498352, | |
| "eval_runtime": 4.1413, | |
| "eval_samples_per_second": 69.06, | |
| "eval_steps_per_second": 0.724, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 308.92, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7886692881584167, | |
| "eval_runtime": 4.049, | |
| "eval_samples_per_second": 70.634, | |
| "eval_steps_per_second": 0.741, | |
| "step": 1004 | |
| }, | |
| { | |
| "epoch": 309.85, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7890444397926331, | |
| "eval_runtime": 4.612, | |
| "eval_samples_per_second": 62.012, | |
| "eval_steps_per_second": 0.65, | |
| "step": 1007 | |
| }, | |
| { | |
| "epoch": 310.77, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7894096970558167, | |
| "eval_runtime": 3.8027, | |
| "eval_samples_per_second": 75.209, | |
| "eval_steps_per_second": 0.789, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 312.0, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7899323105812073, | |
| "eval_runtime": 4.3345, | |
| "eval_samples_per_second": 65.983, | |
| "eval_steps_per_second": 0.692, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 312.92, | |
| "eval_accuracy": 0.6888111888111889, | |
| "eval_loss": 0.7903538346290588, | |
| "eval_runtime": 4.5846, | |
| "eval_samples_per_second": 62.383, | |
| "eval_steps_per_second": 0.654, | |
| "step": 1017 | |
| }, | |
| { | |
| "epoch": 313.85, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.7907257080078125, | |
| "eval_runtime": 4.136, | |
| "eval_samples_per_second": 69.148, | |
| "eval_steps_per_second": 0.725, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 314.77, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.790963888168335, | |
| "eval_runtime": 4.2526, | |
| "eval_samples_per_second": 67.252, | |
| "eval_steps_per_second": 0.705, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 316.0, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.7912085056304932, | |
| "eval_runtime": 4.1188, | |
| "eval_samples_per_second": 69.437, | |
| "eval_steps_per_second": 0.728, | |
| "step": 1027 | |
| }, | |
| { | |
| "epoch": 316.92, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.7911705374717712, | |
| "eval_runtime": 4.1524, | |
| "eval_samples_per_second": 68.876, | |
| "eval_steps_per_second": 0.722, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 317.85, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.7911967039108276, | |
| "eval_runtime": 3.9058, | |
| "eval_samples_per_second": 73.225, | |
| "eval_steps_per_second": 0.768, | |
| "step": 1033 | |
| }, | |
| { | |
| "epoch": 318.77, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.7912610173225403, | |
| "eval_runtime": 4.6095, | |
| "eval_samples_per_second": 62.046, | |
| "eval_steps_per_second": 0.651, | |
| "step": 1036 | |
| }, | |
| { | |
| "epoch": 320.0, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.7912730574607849, | |
| "eval_runtime": 5.5705, | |
| "eval_samples_per_second": 51.342, | |
| "eval_steps_per_second": 0.539, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 320.92, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.7911974787712097, | |
| "eval_runtime": 4.9154, | |
| "eval_samples_per_second": 58.185, | |
| "eval_steps_per_second": 0.61, | |
| "step": 1043 | |
| }, | |
| { | |
| "epoch": 321.85, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.7911575436592102, | |
| "eval_runtime": 4.8387, | |
| "eval_samples_per_second": 59.107, | |
| "eval_steps_per_second": 0.62, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 322.77, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.7911355495452881, | |
| "eval_runtime": 4.1368, | |
| "eval_samples_per_second": 69.135, | |
| "eval_steps_per_second": 0.725, | |
| "step": 1049 | |
| }, | |
| { | |
| "epoch": 323.08, | |
| "grad_norm": 53824.44140625, | |
| "learning_rate": 0.0, | |
| "loss": 0.5194, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 323.08, | |
| "eval_accuracy": 0.6923076923076923, | |
| "eval_loss": 0.7911302447319031, | |
| "eval_runtime": 4.2304, | |
| "eval_samples_per_second": 67.606, | |
| "eval_steps_per_second": 0.709, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 323.08, | |
| "step": 1050, | |
| "total_flos": 4.380490432252032e+18, | |
| "train_loss": 0.8143934268043155, | |
| "train_runtime": 4784.9132, | |
| "train_samples_per_second": 113.231, | |
| "train_steps_per_second": 0.219 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 1050, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 350, | |
| "save_steps": 500, | |
| "total_flos": 4.380490432252032e+18, | |
| "train_batch_size": 128, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |