| { | |
| "best_metric": 0.7207300066947937, | |
| "best_model_checkpoint": "/raildefectfft2/checkpoint-30", | |
| "epoch": 30.0, | |
| "global_step": 450, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00019555555555555556, | |
| "loss": 1.3922, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 0.6114285714285714, | |
| "eval_loss": 1.1690133810043335, | |
| "eval_runtime": 217.3963, | |
| "eval_samples_per_second": 1.61, | |
| "eval_steps_per_second": 0.202, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 0.00019111111111111114, | |
| "loss": 0.8518, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "eval_accuracy": 0.6828571428571428, | |
| "eval_loss": 0.8874489068984985, | |
| "eval_runtime": 217.0024, | |
| "eval_samples_per_second": 1.613, | |
| "eval_steps_per_second": 0.203, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.0001866666666666667, | |
| "loss": 0.5386, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7542857142857143, | |
| "eval_loss": 0.7207300066947937, | |
| "eval_runtime": 221.2829, | |
| "eval_samples_per_second": 1.582, | |
| "eval_steps_per_second": 0.199, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "learning_rate": 0.00018222222222222224, | |
| "loss": 0.3125, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_accuracy": 0.7285714285714285, | |
| "eval_loss": 0.8382583260536194, | |
| "eval_runtime": 217.2612, | |
| "eval_samples_per_second": 1.611, | |
| "eval_steps_per_second": 0.203, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "learning_rate": 0.00017777777777777779, | |
| "loss": 0.2264, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 3.33, | |
| "eval_accuracy": 0.7428571428571429, | |
| "eval_loss": 0.8440293669700623, | |
| "eval_runtime": 215.3358, | |
| "eval_samples_per_second": 1.625, | |
| "eval_steps_per_second": 0.204, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 0.00017333333333333334, | |
| "loss": 0.1613, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7457142857142857, | |
| "eval_loss": 0.8516280055046082, | |
| "eval_runtime": 221.3905, | |
| "eval_samples_per_second": 1.581, | |
| "eval_steps_per_second": 0.199, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 0.00016888888888888889, | |
| "loss": 0.119, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "eval_accuracy": 0.6, | |
| "eval_loss": 1.3625210523605347, | |
| "eval_runtime": 210.1415, | |
| "eval_samples_per_second": 1.666, | |
| "eval_steps_per_second": 0.209, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 0.00016444444444444444, | |
| "loss": 0.0972, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "eval_accuracy": 0.7428571428571429, | |
| "eval_loss": 0.9109606146812439, | |
| "eval_runtime": 208.8901, | |
| "eval_samples_per_second": 1.676, | |
| "eval_steps_per_second": 0.211, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 0.00016, | |
| "loss": 0.0844, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.78, | |
| "eval_loss": 0.8271930813789368, | |
| "eval_runtime": 214.6994, | |
| "eval_samples_per_second": 1.63, | |
| "eval_steps_per_second": 0.205, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "learning_rate": 0.00015555555555555556, | |
| "loss": 0.0725, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 6.67, | |
| "eval_accuracy": 0.74, | |
| "eval_loss": 0.8958377242088318, | |
| "eval_runtime": 209.0619, | |
| "eval_samples_per_second": 1.674, | |
| "eval_steps_per_second": 0.21, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "learning_rate": 0.0001511111111111111, | |
| "loss": 0.0708, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 7.33, | |
| "eval_accuracy": 0.7371428571428571, | |
| "eval_loss": 1.0972360372543335, | |
| "eval_runtime": 210.819, | |
| "eval_samples_per_second": 1.66, | |
| "eval_steps_per_second": 0.209, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 0.00014666666666666666, | |
| "loss": 0.041, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7628571428571429, | |
| "eval_loss": 1.0088900327682495, | |
| "eval_runtime": 209.2911, | |
| "eval_samples_per_second": 1.672, | |
| "eval_steps_per_second": 0.21, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "learning_rate": 0.00014222222222222224, | |
| "loss": 0.0312, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 8.67, | |
| "eval_accuracy": 0.7628571428571429, | |
| "eval_loss": 1.03481125831604, | |
| "eval_runtime": 215.9471, | |
| "eval_samples_per_second": 1.621, | |
| "eval_steps_per_second": 0.204, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "learning_rate": 0.0001377777777777778, | |
| "loss": 0.0401, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 9.33, | |
| "eval_accuracy": 0.7257142857142858, | |
| "eval_loss": 1.2426719665527344, | |
| "eval_runtime": 211.184, | |
| "eval_samples_per_second": 1.657, | |
| "eval_steps_per_second": 0.208, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.00013333333333333334, | |
| "loss": 0.0271, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7542857142857143, | |
| "eval_loss": 1.0153539180755615, | |
| "eval_runtime": 208.7196, | |
| "eval_samples_per_second": 1.677, | |
| "eval_steps_per_second": 0.211, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 10.67, | |
| "learning_rate": 0.00012888888888888892, | |
| "loss": 0.0328, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 10.67, | |
| "eval_accuracy": 0.7714285714285715, | |
| "eval_loss": 1.0373210906982422, | |
| "eval_runtime": 216.803, | |
| "eval_samples_per_second": 1.614, | |
| "eval_steps_per_second": 0.203, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 11.33, | |
| "learning_rate": 0.00012444444444444444, | |
| "loss": 0.023, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 11.33, | |
| "eval_accuracy": 0.7685714285714286, | |
| "eval_loss": 1.005110502243042, | |
| "eval_runtime": 209.183, | |
| "eval_samples_per_second": 1.673, | |
| "eval_steps_per_second": 0.21, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 0.00012, | |
| "loss": 0.0199, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7657142857142857, | |
| "eval_loss": 0.9775477647781372, | |
| "eval_runtime": 208.6639, | |
| "eval_samples_per_second": 1.677, | |
| "eval_steps_per_second": 0.211, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 12.67, | |
| "learning_rate": 0.00011555555555555555, | |
| "loss": 0.0189, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 12.67, | |
| "eval_accuracy": 0.7657142857142857, | |
| "eval_loss": 1.008815050125122, | |
| "eval_runtime": 216.3653, | |
| "eval_samples_per_second": 1.618, | |
| "eval_steps_per_second": 0.203, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "learning_rate": 0.00011111111111111112, | |
| "loss": 0.0188, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "eval_accuracy": 0.7342857142857143, | |
| "eval_loss": 1.1904319524765015, | |
| "eval_runtime": 209.8186, | |
| "eval_samples_per_second": 1.668, | |
| "eval_steps_per_second": 0.21, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 0.00010666666666666667, | |
| "loss": 0.0167, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7285714285714285, | |
| "eval_loss": 1.2999135255813599, | |
| "eval_runtime": 209.7587, | |
| "eval_samples_per_second": 1.669, | |
| "eval_steps_per_second": 0.21, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 14.67, | |
| "learning_rate": 0.00010222222222222222, | |
| "loss": 0.0159, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 14.67, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.1326370239257812, | |
| "eval_runtime": 215.8574, | |
| "eval_samples_per_second": 1.621, | |
| "eval_steps_per_second": 0.204, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 15.33, | |
| "learning_rate": 9.777777777777778e-05, | |
| "loss": 0.0145, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 15.33, | |
| "eval_accuracy": 0.7542857142857143, | |
| "eval_loss": 1.1385791301727295, | |
| "eval_runtime": 209.3052, | |
| "eval_samples_per_second": 1.672, | |
| "eval_steps_per_second": 0.21, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 9.333333333333334e-05, | |
| "loss": 0.015, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7542857142857143, | |
| "eval_loss": 1.1441489458084106, | |
| "eval_runtime": 209.1403, | |
| "eval_samples_per_second": 1.674, | |
| "eval_steps_per_second": 0.21, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 16.67, | |
| "learning_rate": 8.888888888888889e-05, | |
| "loss": 0.0133, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 16.67, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.154445767402649, | |
| "eval_runtime": 212.9184, | |
| "eval_samples_per_second": 1.644, | |
| "eval_steps_per_second": 0.207, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 17.33, | |
| "learning_rate": 8.444444444444444e-05, | |
| "loss": 0.0132, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 17.33, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.1629431247711182, | |
| "eval_runtime": 209.529, | |
| "eval_samples_per_second": 1.67, | |
| "eval_steps_per_second": 0.21, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 8e-05, | |
| "loss": 0.0121, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.1707779169082642, | |
| "eval_runtime": 209.7822, | |
| "eval_samples_per_second": 1.668, | |
| "eval_steps_per_second": 0.21, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 18.67, | |
| "learning_rate": 7.555555555555556e-05, | |
| "loss": 0.0121, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 18.67, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.1773394346237183, | |
| "eval_runtime": 220.9849, | |
| "eval_samples_per_second": 1.584, | |
| "eval_steps_per_second": 0.199, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 19.33, | |
| "learning_rate": 7.111111111111112e-05, | |
| "loss": 0.0114, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 19.33, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.1831494569778442, | |
| "eval_runtime": 216.1426, | |
| "eval_samples_per_second": 1.619, | |
| "eval_steps_per_second": 0.204, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 6.666666666666667e-05, | |
| "loss": 0.0111, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.1883198022842407, | |
| "eval_runtime": 210.1269, | |
| "eval_samples_per_second": 1.666, | |
| "eval_steps_per_second": 0.209, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 20.67, | |
| "learning_rate": 6.222222222222222e-05, | |
| "loss": 0.011, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 20.67, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.1936721801757812, | |
| "eval_runtime": 210.5377, | |
| "eval_samples_per_second": 1.662, | |
| "eval_steps_per_second": 0.209, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 21.33, | |
| "learning_rate": 5.7777777777777776e-05, | |
| "loss": 0.0103, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 21.33, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.1992676258087158, | |
| "eval_runtime": 214.4415, | |
| "eval_samples_per_second": 1.632, | |
| "eval_steps_per_second": 0.205, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 5.333333333333333e-05, | |
| "loss": 0.0103, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.2045563459396362, | |
| "eval_runtime": 211.851, | |
| "eval_samples_per_second": 1.652, | |
| "eval_steps_per_second": 0.208, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 22.67, | |
| "learning_rate": 4.888888888888889e-05, | |
| "loss": 0.0103, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 22.67, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.208925724029541, | |
| "eval_runtime": 209.6112, | |
| "eval_samples_per_second": 1.67, | |
| "eval_steps_per_second": 0.21, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 23.33, | |
| "learning_rate": 4.4444444444444447e-05, | |
| "loss": 0.0096, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 23.33, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.2132576704025269, | |
| "eval_runtime": 219.2216, | |
| "eval_samples_per_second": 1.597, | |
| "eval_steps_per_second": 0.201, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 4e-05, | |
| "loss": 0.0095, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.2171136140823364, | |
| "eval_runtime": 208.9362, | |
| "eval_samples_per_second": 1.675, | |
| "eval_steps_per_second": 0.211, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 24.67, | |
| "learning_rate": 3.555555555555556e-05, | |
| "loss": 0.0096, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 24.67, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.2204023599624634, | |
| "eval_runtime": 209.9857, | |
| "eval_samples_per_second": 1.667, | |
| "eval_steps_per_second": 0.21, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 25.33, | |
| "learning_rate": 3.111111111111111e-05, | |
| "loss": 0.0093, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 25.33, | |
| "eval_accuracy": 0.7485714285714286, | |
| "eval_loss": 1.2234961986541748, | |
| "eval_runtime": 216.2311, | |
| "eval_samples_per_second": 1.619, | |
| "eval_steps_per_second": 0.203, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 2.6666666666666667e-05, | |
| "loss": 0.0091, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7485714285714286, | |
| "eval_loss": 1.2261521816253662, | |
| "eval_runtime": 210.1553, | |
| "eval_samples_per_second": 1.665, | |
| "eval_steps_per_second": 0.209, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 26.67, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 0.0092, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 26.67, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.227960228919983, | |
| "eval_runtime": 211.3289, | |
| "eval_samples_per_second": 1.656, | |
| "eval_steps_per_second": 0.208, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 27.33, | |
| "learning_rate": 1.777777777777778e-05, | |
| "loss": 0.0089, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 27.33, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.2296239137649536, | |
| "eval_runtime": 213.6977, | |
| "eval_samples_per_second": 1.638, | |
| "eval_steps_per_second": 0.206, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 1.3333333333333333e-05, | |
| "loss": 0.0092, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7514285714285714, | |
| "eval_loss": 1.2309640645980835, | |
| "eval_runtime": 210.9587, | |
| "eval_samples_per_second": 1.659, | |
| "eval_steps_per_second": 0.209, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 28.67, | |
| "learning_rate": 8.88888888888889e-06, | |
| "loss": 0.0089, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 28.67, | |
| "eval_accuracy": 0.7485714285714286, | |
| "eval_loss": 1.2319449186325073, | |
| "eval_runtime": 210.158, | |
| "eval_samples_per_second": 1.665, | |
| "eval_steps_per_second": 0.209, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 29.33, | |
| "learning_rate": 4.444444444444445e-06, | |
| "loss": 0.0089, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 29.33, | |
| "eval_accuracy": 0.7485714285714286, | |
| "eval_loss": 1.23252272605896, | |
| "eval_runtime": 216.1534, | |
| "eval_samples_per_second": 1.619, | |
| "eval_steps_per_second": 0.204, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0088, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7485714285714286, | |
| "eval_loss": 1.2327271699905396, | |
| "eval_runtime": 216.0649, | |
| "eval_samples_per_second": 1.62, | |
| "eval_steps_per_second": 0.204, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 450, | |
| "total_flos": 9.8805828893184e+17, | |
| "train_loss": 0.09906705205639203, | |
| "train_runtime": 36217.9644, | |
| "train_samples_per_second": 0.352, | |
| "train_steps_per_second": 0.012 | |
| } | |
| ], | |
| "max_steps": 450, | |
| "num_train_epochs": 30, | |
| "total_flos": 9.8805828893184e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |