| { | |
| "best_metric": 0.8431372549019608, | |
| "best_model_checkpoint": "vit-base-patch16-224-U8-40d\\checkpoint-200", | |
| "epoch": 40.0, | |
| "eval_steps": 500, | |
| "global_step": 800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 1.5e-05, | |
| "loss": 1.3819, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 3e-05, | |
| "loss": 1.3419, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.47058823529411764, | |
| "eval_loss": 1.299819827079773, | |
| "eval_runtime": 0.7952, | |
| "eval_samples_per_second": 64.133, | |
| "eval_steps_per_second": 2.515, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 4.5e-05, | |
| "loss": 1.263, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 6e-05, | |
| "loss": 1.1313, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.5686274509803921, | |
| "eval_loss": 1.0832325220108032, | |
| "eval_runtime": 0.8195, | |
| "eval_samples_per_second": 62.236, | |
| "eval_steps_per_second": 2.441, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 5.921052631578947e-05, | |
| "loss": 0.978, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 5.842105263157895e-05, | |
| "loss": 0.7969, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_loss": 0.809360682964325, | |
| "eval_runtime": 0.8396, | |
| "eval_samples_per_second": 60.742, | |
| "eval_steps_per_second": 2.382, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 3.5, | |
| "learning_rate": 5.7631578947368423e-05, | |
| "loss": 0.6378, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 5.684210526315789e-05, | |
| "loss": 0.5063, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7843137254901961, | |
| "eval_loss": 0.6573488116264343, | |
| "eval_runtime": 0.7944, | |
| "eval_samples_per_second": 64.199, | |
| "eval_steps_per_second": 2.518, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 5.605263157894737e-05, | |
| "loss": 0.3989, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 5.5263157894736845e-05, | |
| "loss": 0.3367, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7647058823529411, | |
| "eval_loss": 0.6389498114585876, | |
| "eval_runtime": 0.7895, | |
| "eval_samples_per_second": 64.594, | |
| "eval_steps_per_second": 2.533, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 5.5, | |
| "learning_rate": 5.447368421052632e-05, | |
| "loss": 0.2707, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 5.368421052631579e-05, | |
| "loss": 0.242, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7450980392156863, | |
| "eval_loss": 0.6878873705863953, | |
| "eval_runtime": 0.8009, | |
| "eval_samples_per_second": 63.676, | |
| "eval_steps_per_second": 2.497, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 6.5, | |
| "learning_rate": 5.289473684210526e-05, | |
| "loss": 0.2263, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 5.210526315789474e-05, | |
| "loss": 0.1881, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7058823529411765, | |
| "eval_loss": 0.7939884066581726, | |
| "eval_runtime": 0.8019, | |
| "eval_samples_per_second": 63.596, | |
| "eval_steps_per_second": 2.494, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 5.131578947368421e-05, | |
| "loss": 0.2096, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 5.052631578947368e-05, | |
| "loss": 0.1561, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7647058823529411, | |
| "eval_loss": 0.8029699325561523, | |
| "eval_runtime": 0.7855, | |
| "eval_samples_per_second": 64.927, | |
| "eval_steps_per_second": 2.546, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 4.973684210526316e-05, | |
| "loss": 0.1468, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 4.8947368421052635e-05, | |
| "loss": 0.1557, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.8235294117647058, | |
| "eval_loss": 0.7004449367523193, | |
| "eval_runtime": 0.797, | |
| "eval_samples_per_second": 63.992, | |
| "eval_steps_per_second": 2.509, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 9.5, | |
| "learning_rate": 4.815789473684211e-05, | |
| "loss": 0.1385, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 4.736842105263158e-05, | |
| "loss": 0.1154, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.8431372549019608, | |
| "eval_loss": 0.649506688117981, | |
| "eval_runtime": 0.7901, | |
| "eval_samples_per_second": 64.552, | |
| "eval_steps_per_second": 2.531, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 10.5, | |
| "learning_rate": 4.657894736842105e-05, | |
| "loss": 0.1235, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 4.5789473684210527e-05, | |
| "loss": 0.1469, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7058823529411765, | |
| "eval_loss": 1.1387523412704468, | |
| "eval_runtime": 0.7986, | |
| "eval_samples_per_second": 63.858, | |
| "eval_steps_per_second": 2.504, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 11.5, | |
| "learning_rate": 4.5e-05, | |
| "loss": 0.1223, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 4.421052631578947e-05, | |
| "loss": 0.0898, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7647058823529411, | |
| "eval_loss": 0.7966563105583191, | |
| "eval_runtime": 0.8161, | |
| "eval_samples_per_second": 62.495, | |
| "eval_steps_per_second": 2.451, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 4.342105263157895e-05, | |
| "loss": 0.1042, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 4.2631578947368425e-05, | |
| "loss": 0.0719, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 0.8934146165847778, | |
| "eval_runtime": 0.7865, | |
| "eval_samples_per_second": 64.845, | |
| "eval_steps_per_second": 2.543, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 13.5, | |
| "learning_rate": 4.1842105263157894e-05, | |
| "loss": 0.0927, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 4.105263157894737e-05, | |
| "loss": 0.0739, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7647058823529411, | |
| "eval_loss": 0.8476159572601318, | |
| "eval_runtime": 0.8141, | |
| "eval_samples_per_second": 62.643, | |
| "eval_steps_per_second": 2.457, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 14.5, | |
| "learning_rate": 4.026315789473684e-05, | |
| "loss": 0.088, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 3.9473684210526316e-05, | |
| "loss": 0.0823, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7647058823529411, | |
| "eval_loss": 0.9692044854164124, | |
| "eval_runtime": 0.82, | |
| "eval_samples_per_second": 62.196, | |
| "eval_steps_per_second": 2.439, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 15.5, | |
| "learning_rate": 3.868421052631579e-05, | |
| "loss": 0.0714, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 3.789473684210526e-05, | |
| "loss": 0.0828, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7843137254901961, | |
| "eval_loss": 0.9384645819664001, | |
| "eval_runtime": 0.8043, | |
| "eval_samples_per_second": 63.407, | |
| "eval_steps_per_second": 2.487, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 16.5, | |
| "learning_rate": 3.710526315789474e-05, | |
| "loss": 0.0762, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 3.6315789473684214e-05, | |
| "loss": 0.0761, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7254901960784313, | |
| "eval_loss": 1.1684223413467407, | |
| "eval_runtime": 0.8011, | |
| "eval_samples_per_second": 63.661, | |
| "eval_steps_per_second": 2.496, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "learning_rate": 3.5526315789473684e-05, | |
| "loss": 0.0925, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 3.473684210526316e-05, | |
| "loss": 0.0597, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7647058823529411, | |
| "eval_loss": 0.9413917660713196, | |
| "eval_runtime": 0.8568, | |
| "eval_samples_per_second": 59.521, | |
| "eval_steps_per_second": 2.334, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 18.5, | |
| "learning_rate": 3.394736842105263e-05, | |
| "loss": 0.0806, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 3.3157894736842106e-05, | |
| "loss": 0.0727, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7058823529411765, | |
| "eval_loss": 1.020107388496399, | |
| "eval_runtime": 0.8388, | |
| "eval_samples_per_second": 60.801, | |
| "eval_steps_per_second": 2.384, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 19.5, | |
| "learning_rate": 3.236842105263158e-05, | |
| "loss": 0.0789, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 3.157894736842105e-05, | |
| "loss": 0.0507, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 0.8562985062599182, | |
| "eval_runtime": 0.7915, | |
| "eval_samples_per_second": 64.435, | |
| "eval_steps_per_second": 2.527, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 20.5, | |
| "learning_rate": 3.078947368421053e-05, | |
| "loss": 0.0557, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 3e-05, | |
| "loss": 0.0587, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7843137254901961, | |
| "eval_loss": 0.8476255536079407, | |
| "eval_runtime": 0.7773, | |
| "eval_samples_per_second": 65.611, | |
| "eval_steps_per_second": 2.573, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 21.5, | |
| "learning_rate": 2.9210526315789474e-05, | |
| "loss": 0.0615, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 2.8421052631578946e-05, | |
| "loss": 0.0608, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 0.9399316310882568, | |
| "eval_runtime": 0.8324, | |
| "eval_samples_per_second": 61.27, | |
| "eval_steps_per_second": 2.403, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 22.5, | |
| "learning_rate": 2.7631578947368423e-05, | |
| "loss": 0.0505, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 2.6842105263157896e-05, | |
| "loss": 0.055, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.7450980392156863, | |
| "eval_loss": 0.8819794654846191, | |
| "eval_runtime": 0.7867, | |
| "eval_samples_per_second": 64.824, | |
| "eval_steps_per_second": 2.542, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 23.5, | |
| "learning_rate": 2.605263157894737e-05, | |
| "loss": 0.0682, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 2.526315789473684e-05, | |
| "loss": 0.0619, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7647058823529411, | |
| "eval_loss": 1.0459517240524292, | |
| "eval_runtime": 0.7938, | |
| "eval_samples_per_second": 64.25, | |
| "eval_steps_per_second": 2.52, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 24.5, | |
| "learning_rate": 2.4473684210526318e-05, | |
| "loss": 0.0448, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 2.368421052631579e-05, | |
| "loss": 0.0615, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.8235294117647058, | |
| "eval_loss": 0.9392306804656982, | |
| "eval_runtime": 0.7883, | |
| "eval_samples_per_second": 64.697, | |
| "eval_steps_per_second": 2.537, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 25.5, | |
| "learning_rate": 2.2894736842105263e-05, | |
| "loss": 0.0488, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 2.2105263157894736e-05, | |
| "loss": 0.0455, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.8235294117647058, | |
| "eval_loss": 0.9267483353614807, | |
| "eval_runtime": 0.7948, | |
| "eval_samples_per_second": 64.17, | |
| "eval_steps_per_second": 2.516, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 26.5, | |
| "learning_rate": 2.1315789473684212e-05, | |
| "loss": 0.0493, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 2.0526315789473685e-05, | |
| "loss": 0.0567, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.7843137254901961, | |
| "eval_loss": 0.9784489870071411, | |
| "eval_runtime": 0.8082, | |
| "eval_samples_per_second": 63.101, | |
| "eval_steps_per_second": 2.475, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 27.5, | |
| "learning_rate": 1.9736842105263158e-05, | |
| "loss": 0.0467, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 1.894736842105263e-05, | |
| "loss": 0.032, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7647058823529411, | |
| "eval_loss": 1.1540778875350952, | |
| "eval_runtime": 0.7989, | |
| "eval_samples_per_second": 63.834, | |
| "eval_steps_per_second": 2.503, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 28.5, | |
| "learning_rate": 1.8157894736842107e-05, | |
| "loss": 0.0242, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 1.736842105263158e-05, | |
| "loss": 0.0276, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7843137254901961, | |
| "eval_loss": 0.8864995837211609, | |
| "eval_runtime": 0.8109, | |
| "eval_samples_per_second": 62.894, | |
| "eval_steps_per_second": 2.466, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 29.5, | |
| "learning_rate": 1.6578947368421053e-05, | |
| "loss": 0.058, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 1.5789473684210526e-05, | |
| "loss": 0.0368, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 1.0847781896591187, | |
| "eval_runtime": 0.7819, | |
| "eval_samples_per_second": 65.229, | |
| "eval_steps_per_second": 2.558, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 30.5, | |
| "learning_rate": 1.5e-05, | |
| "loss": 0.0479, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 1.4210526315789473e-05, | |
| "loss": 0.0342, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 0.9638255834579468, | |
| "eval_runtime": 0.7841, | |
| "eval_samples_per_second": 65.044, | |
| "eval_steps_per_second": 2.551, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 31.5, | |
| "learning_rate": 1.3421052631578948e-05, | |
| "loss": 0.0352, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 1.263157894736842e-05, | |
| "loss": 0.037, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 0.961588978767395, | |
| "eval_runtime": 0.7905, | |
| "eval_samples_per_second": 64.516, | |
| "eval_steps_per_second": 2.53, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 32.5, | |
| "learning_rate": 1.1842105263157895e-05, | |
| "loss": 0.0295, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 1.1052631578947368e-05, | |
| "loss": 0.0371, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 1.0072985887527466, | |
| "eval_runtime": 0.7709, | |
| "eval_samples_per_second": 66.152, | |
| "eval_steps_per_second": 2.594, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 33.5, | |
| "learning_rate": 1.0263157894736843e-05, | |
| "loss": 0.0299, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 9.473684210526315e-06, | |
| "loss": 0.0371, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 1.0493559837341309, | |
| "eval_runtime": 0.7834, | |
| "eval_samples_per_second": 65.102, | |
| "eval_steps_per_second": 2.553, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 34.5, | |
| "learning_rate": 8.68421052631579e-06, | |
| "loss": 0.0217, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 7.894736842105263e-06, | |
| "loss": 0.0359, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.7843137254901961, | |
| "eval_loss": 1.1287018060684204, | |
| "eval_runtime": 0.7799, | |
| "eval_samples_per_second": 65.39, | |
| "eval_steps_per_second": 2.564, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 35.5, | |
| "learning_rate": 7.105263157894737e-06, | |
| "loss": 0.0483, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 6.31578947368421e-06, | |
| "loss": 0.0255, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.7647058823529411, | |
| "eval_loss": 1.1830930709838867, | |
| "eval_runtime": 0.7736, | |
| "eval_samples_per_second": 65.928, | |
| "eval_steps_per_second": 2.585, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 36.5, | |
| "learning_rate": 5.526315789473684e-06, | |
| "loss": 0.026, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "learning_rate": 4.736842105263158e-06, | |
| "loss": 0.0269, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.7843137254901961, | |
| "eval_loss": 1.1609560251235962, | |
| "eval_runtime": 0.8022, | |
| "eval_samples_per_second": 63.578, | |
| "eval_steps_per_second": 2.493, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 37.5, | |
| "learning_rate": 3.9473684210526315e-06, | |
| "loss": 0.023, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "learning_rate": 3.157894736842105e-06, | |
| "loss": 0.0292, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.7843137254901961, | |
| "eval_loss": 1.1842255592346191, | |
| "eval_runtime": 0.815, | |
| "eval_samples_per_second": 62.574, | |
| "eval_steps_per_second": 2.454, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 38.5, | |
| "learning_rate": 2.368421052631579e-06, | |
| "loss": 0.0328, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "learning_rate": 1.5789473684210526e-06, | |
| "loss": 0.0161, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 1.109218716621399, | |
| "eval_runtime": 0.7881, | |
| "eval_samples_per_second": 64.712, | |
| "eval_steps_per_second": 2.538, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 39.5, | |
| "learning_rate": 7.894736842105263e-07, | |
| "loss": 0.0197, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0333, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.803921568627451, | |
| "eval_loss": 1.1185595989227295, | |
| "eval_runtime": 0.7848, | |
| "eval_samples_per_second": 64.984, | |
| "eval_steps_per_second": 2.548, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "step": 800, | |
| "total_flos": 7.873327274596762e+18, | |
| "train_loss": 0.17486795043572784, | |
| "train_runtime": 1567.8548, | |
| "train_samples_per_second": 64.802, | |
| "train_steps_per_second": 0.51 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 800, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 500, | |
| "total_flos": 7.873327274596762e+18, | |
| "train_batch_size": 32, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |